diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9200e5e87b8..2ef05fe989b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,38 +12,11 @@ jobs: ReleasePublish: runs-on: [self-hosted, style-checker] steps: - - name: Set envs + - name: Deploy packages and assets run: | - cat >> "$GITHUB_ENV" << 'EOF' - JFROG_API_KEY=${{ secrets.JFROG_ARTIFACTORY_API_KEY }} - TEMP_PATH=${{runner.temp}}/release_packages - REPO_COPY=${{runner.temp}}/release_packages/ClickHouse - EOF - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - # Always use the most recent script version - ref: master - - name: Download packages and push to Artifactory - run: | - rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY" - # Download and push packages to artifactory - python3 ./tests/ci/push_to_artifactory.py --release '${{ github.ref }}' \ - --commit '${{ github.sha }}' --artifactory-url '${{ secrets.JFROG_ARTIFACTORY_URL }}' --all - # Download macos binaries to ${{runner.temp}}/download_binary - python3 ./tests/ci/download_binary.py --version '${{ github.ref }}' \ - --commit '${{ github.sha }}' binary_darwin binary_darwin_aarch64 - mv '${{runner.temp}}/download_binary/'clickhouse-* '${{runner.temp}}/push_to_artifactory' - - name: Upload packages to release assets - uses: svenstaro/upload-release-action@v2 - with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - file: ${{runner.temp}}/push_to_artifactory/* - overwrite: true - tag: ${{ github.ref }} - file_glob: true + GITHUB_TAG="${GITHUB_REF#refs/tags/}" + curl --silent --data '' \ + '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true' ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ diff --git a/.gitmodules b/.gitmodules index b4673f113b7..e395860d957 100644 --- a/.gitmodules +++ b/.gitmodules @@ -330,3 +330,6 @@ [submodule "contrib/crc32-vpmsum"] path = contrib/crc32-vpmsum url = https://github.com/antonblanchard/crc32-vpmsum.git +[submodule "contrib/liburing"] + path = contrib/liburing + url = https://github.com/axboe/liburing diff --git a/CHANGELOG.md b/CHANGELOG.md index ddc10c1eb2f..a89619aa7ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,1857 +1,197 @@ ### Table of Contents -**[ClickHouse release v22.12, 2022-12-15](#2212)**
-**[ClickHouse release v22.11, 2022-11-17](#2211)**
-**[ClickHouse release v22.10, 2022-10-25](#2210)**
-**[ClickHouse release v22.9, 2022-09-22](#229)**
-**[ClickHouse release v22.8-lts, 2022-08-18](#228)**
-**[ClickHouse release v22.7, 2022-07-21](#227)**
-**[ClickHouse release v22.6, 2022-06-16](#226)**
-**[ClickHouse release v22.5, 2022-05-19](#225)**
-**[ClickHouse release v22.4, 2022-04-20](#224)**
-**[ClickHouse release v22.3-lts, 2022-03-17](#223)**
-**[ClickHouse release v22.2, 2022-02-17](#222)**
-**[ClickHouse release v22.1, 2022-01-18](#221)**
-**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+**[ClickHouse release v23.1, 2023-01-25](#231)**
+**[Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022/)**
-# 2022 Changelog +# 2023 Changelog -### ClickHouse release 22.12, 2022-12-15 +### ClickHouse release 23.1, 2023-01-26 -#### Backward Incompatible Change -* Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). If you have a column or an alias named `all` and doing `GROUP BY all` without the intention to group by all the columns, the query will have a different semantic. To keep the old semantic, put `all` into backticks or double quotes `"all"` to make it an identifier instead of a keyword. +### ClickHouse release 23.1 #### Upgrade Notes -* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend upgrading from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append an extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then the incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Alexander Tokmakov](https://github.com/tavplubix), [Raúl Marín](https://github.com/Algunenano)). Note: all the official ClickHouse builds already include the patches. This is not necessarily true for unofficial third-party builds that should be avoided. +* The `SYSTEM RESTART DISK` query becomes a no-op. [#44647](https://github.com/ClickHouse/ClickHouse/pull/44647) ([alesapin](https://github.com/alesapin)). +* The `PREALLOCATE` option for `HASHED`/`SPARSE_HASHED` dictionaries becomes a no-op. [#45388](https://github.com/ClickHouse/ClickHouse/pull/45388) ([Azat Khuzhin](https://github.com/azat)). It does not give significant advantages anymore. +* Disallow `Gorilla` codec on columns of non-Float32 or non-Float64 type. [#45252](https://github.com/ClickHouse/ClickHouse/pull/45252) ([Robert Schulze](https://github.com/rschu1ze)). It was pointless and led to inconsistencies. +* Parallel quorum inserts might work incorrectly with `*MergeTree` tables created with the deprecated syntax. Therefore, parallel quorum inserts support is completely disabled for such tables. It does not affect tables created with a new syntax. [#45430](https://github.com/ClickHouse/ClickHouse/pull/45430) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use the `GetObjectAttributes` request instead of the `HeadObject` request to get the size of an object in AWS S3. This change fixes handling endpoints without explicit regions after updating the AWS SDK, for example. [#45288](https://github.com/ClickHouse/ClickHouse/pull/45288) ([Vitaly Baranov](https://github.com/vitlibar)). AWS S3 and Minio are tested, but keep in mind that various S3-compatible services (GCS, R2, B2) may have subtle incompatibilities. This change also may require you to adjust the ACL to allow the `GetObjectAttributes` request. +* Forbid paths in timezone names. For example, a timezone name like `/usr/share/zoneinfo/Asia/Aden` is not allowed; the IANA timezone database name like `Asia/Aden` should be used. [#44225](https://github.com/ClickHouse/ClickHouse/pull/44225) ([Kruglov Pavel](https://github.com/Avogar)). +* Queries combining equijoin and constant expressions (e.g., `JOIN ON t1.x = t2.x AND 1 = 1`) are forbidden due to incorrect results. [#44016](https://github.com/ClickHouse/ClickHouse/pull/44016) ([Vladimir C](https://github.com/vdimir)). + #### New Feature -* Add `BSONEachRow` input/output format. In this format, ClickHouse formats/parses each row as a separate BSON document and each column is formatted/parsed as a single BSON field with the column name as the key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). -* Add `grace_hash` JOIN algorithm, it can be enabled with `SET join_algorithm = 'grace_hash'`. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye), [Vladimir C](https://github.com/vdimir)). -* Allow configuring password complexity rules and checks for creating and changing users. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)). -* Mask sensitive information in logs; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). -* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added function `concatWithSeparator` and `concat_ws` as an alias for Spark SQL compatibility. A function `concatWithSeparatorAssumeInjective` added as a variant to enable GROUP BY optimization, similarly to `concatAssumeInjective`. [#43749](https://github.com/ClickHouse/ClickHouse/pull/43749) ([李扬](https://github.com/taiyang-li)). -* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)). -* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)). -* Add support for embedded Prometheus endpoint for ClickHouse Keeper. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)). -* Support numeric literals with `_` as the separator, for example, `1_000_000`. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)). -* Added possibility to use an array as a second parameter for `cutURLParameter` function. It will cut multiple parameters. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)). -* Add a column with the expression of the index in the `system.data_skipping_indices` table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)). -* Add column `engine_full` to system table `databases` so that users can access the entire engine definition of a database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)). -* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also, the performance of `xxHash32` and `xxHash64` are improved on ARM thanks to a library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)). -* Added support to define constraints for merge tree settings. For example you can forbid overriding the `storage_policy` by users. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)). -* Add a new setting `input_format_json_read_objects_as_strings` that allows the parsing of nested JSON objects into Strings in all JSON input formats. This setting is disabled by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)). +* Dictionary source for extracting keys by traversing regular expressions tree. It can be used for User-Agent parsing. [#40878](https://github.com/ClickHouse/ClickHouse/pull/40878) ([Vage Ogannisian](https://github.com/nooblose)). [#43858](https://github.com/ClickHouse/ClickHouse/pull/43858) ([Han Fei](https://github.com/hanfei1991)). +* Added parametrized view functionality, now it's possible to specify query parameters for the View table engine. resolves [#40907](https://github.com/ClickHouse/ClickHouse/issues/40907). [#41687](https://github.com/ClickHouse/ClickHouse/pull/41687) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add `quantileInterpolatedWeighted`/`quantilesInterpolatedWeighted` functions. [#38252](https://github.com/ClickHouse/ClickHouse/pull/38252) ([Bharat Nallan](https://github.com/bharatnc)). +* Array join support for the `Map` type, like the function "explode" in Spark. [#43239](https://github.com/ClickHouse/ClickHouse/pull/43239) ([李扬](https://github.com/taiyang-li)). +* Support SQL standard binary and hex string literals. [#43785](https://github.com/ClickHouse/ClickHouse/pull/43785) ([Mo Xuan](https://github.com/mo-avatar)). +* Allow formatting `DateTime` in Joda-Time style. Refer to [the Joda-Time docs](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html). [#43818](https://github.com/ClickHouse/ClickHouse/pull/43818) ([李扬](https://github.com/taiyang-li)). +* Implemented a fractional second formatter (`%f`) for `formatDateTime`. [#44060](https://github.com/ClickHouse/ClickHouse/pull/44060) ([ltrk2](https://github.com/ltrk2)). [#44497](https://github.com/ClickHouse/ClickHouse/pull/44497) ([Alexander Gololobov](https://github.com/davenger)). +* Added `age` function to calculate the difference between two dates or dates with time values expressed as the number of full units. Closes [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#44421](https://github.com/ClickHouse/ClickHouse/pull/44421) ([Robert Schulze](https://github.com/rschu1ze)). +* Add `Null` source for dictionaries. Closes [#44240](https://github.com/ClickHouse/ClickHouse/issues/44240). [#44502](https://github.com/ClickHouse/ClickHouse/pull/44502) ([mayamika](https://github.com/mayamika)). +* Allow configuring the S3 storage class with the `s3_storage_class` configuration option. Such as `STANDARD/INTELLIGENT_TIERING` Closes [#44443](https://github.com/ClickHouse/ClickHouse/issues/44443). [#44707](https://github.com/ClickHouse/ClickHouse/pull/44707) ([chen](https://github.com/xiedeyantu)). +* Insert default values in case of missing elements in JSON object while parsing named tuple. Add setting `input_format_json_defaults_for_missing_elements_in_named_tuple` that controls this behaviour. Closes [#45142](https://github.com/ClickHouse/ClickHouse/issues/45142)#issuecomment-1380153217. [#45231](https://github.com/ClickHouse/ClickHouse/pull/45231) ([Kruglov Pavel](https://github.com/Avogar)). +* Record server startup time in ProfileEvents (`ServerStartupMilliseconds`). Resolves [#43188](https://github.com/ClickHouse/ClickHouse/issues/43188). [#45250](https://github.com/ClickHouse/ClickHouse/pull/45250) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Refactor and Improve streaming engines Kafka/RabbitMQ/NATS and add support for all formats, also refactor formats a bit: - Fix producing messages in row-based formats with suffixes/prefixes. Now every message is formatted completely with all delimiters and can be parsed back using input format. - Support block-based formats like Native, Parquet, ORC, etc. Every block is formatted as a separate message. The number of rows in one message depends on the block size, so you can control it via the setting `max_block_size`. - Add new engine settings `kafka_max_rows_per_message/rabbitmq_max_rows_per_message/nats_max_rows_per_message`. They control the number of rows formatted in one message in row-based formats. Default value: 1. - Fix high memory consumption in the NATS table engine. - Support arbitrary binary data in NATS producer (previously it worked only with strings contained \0 at the end) - Add missing Kafka/RabbitMQ/NATS engine settings in the documentation. - Refactor producing and consuming in Kafka/RabbitMQ/NATS, separate it from WriteBuffers/ReadBuffers semantic. - Refactor output formats: remove callbacks on each row used in Kafka/RabbitMQ/NATS (now we don't use callbacks there), allow to use IRowOutputFormat directly, clarify row end and row between delimiters, make it possible to reset output format to start formatting again - Add proper implementation in formatRow function (bonus after formats refactoring). [#42777](https://github.com/ClickHouse/ClickHouse/pull/42777) ([Kruglov Pavel](https://github.com/Avogar)). +* Support reading/writing `Nested` tables as `List` of `Struct` in `CapnProto` format. Read/write `Decimal32/64` as `Int32/64`. Closes [#43319](https://github.com/ClickHouse/ClickHouse/issues/43319). [#43379](https://github.com/ClickHouse/ClickHouse/pull/43379) ([Kruglov Pavel](https://github.com/Avogar)). +* Added a `message_format_string` column to `system.text_log`. The column contains a pattern that was used to format the message. [#44543](https://github.com/ClickHouse/ClickHouse/pull/44543) ([Alexander Tokmakov](https://github.com/tavplubix)). This allows various analytics over the ClickHouse logs. +* Try to autodetect headers with column names (and maybe types) for CSV/TSV/CustomSeparated input formats. +Add settings input_format_tsv/csv/custom_detect_header that enable this behaviour (enabled by default). Closes [#44640](https://github.com/ClickHouse/ClickHouse/issues/44640). [#44953](https://github.com/ClickHouse/ClickHouse/pull/44953) ([Kruglov Pavel](https://github.com/Avogar)). #### Experimental Feature -* Support deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch. Closes [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075). [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). -* Add support for cosine distance for the experimental Annoy (vector similarity search) index. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). This feature is under development and the queries are not effective as of version 22.12. This changelog entry is added only to avoid confusion. Restrict default access to named collections to the user defined in config. This requires that `show_named_collections = 1` is set to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add an experimental inverted index as a new secondary index type for efficient text search. [#38667](https://github.com/ClickHouse/ClickHouse/pull/38667) ([larryluogit](https://github.com/larryluogit)). +* Add experimental query result cache. [#43797](https://github.com/ClickHouse/ClickHouse/pull/43797) ([Robert Schulze](https://github.com/rschu1ze)). +* Added extendable and configurable scheduling subsystem for IO requests (not yet integrated with IO code itself). [#41840](https://github.com/ClickHouse/ClickHouse/pull/41840) ([Sergei Trifonov](https://github.com/serxa)). This feature does nothing at all, enjoy. +* Added `SYSTEM DROP DATABASE REPLICA` that removes metadata of a dead replica of a `Replicated` database. Resolves [#41794](https://github.com/ClickHouse/ClickHouse/issues/41794). [#42807](https://github.com/ClickHouse/ClickHouse/pull/42807) ([Alexander Tokmakov](https://github.com/tavplubix)). #### Performance Improvement -* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables a background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). This improves performance up to 100 times in case of high latency storage, low number of CPU and high number of data parts. -* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage when using remote filesystems. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Optimized the number of list requests to ZooKeeper or ClickHouse Keeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Optimization is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small a value. The default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)). -* Speed-up server shutdown by avoiding cleaning up of old data parts. Because it is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)). -* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)). -* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)). -* Keeper improvement: requests are batched more often. The batching can be controlled with the new setting `max_requests_quick_batch_size`. [#43686](https://github.com/ClickHouse/ClickHouse/pull/43686) ([Antonio Andelic](https://github.com/antonio2368)). - -#### Improvement -* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)). -* Substitute UDFs in `CREATE` query to avoid failures during loading at startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)). -* Change how the following queries delete parts: TRUNCATE TABLE, ALTER TABLE DROP PART, ALTER TABLE DROP PARTITION. Now, these queries make empty parts which cover the old parts. This makes the TRUNCATE query work without a followedexclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If the request succeeds, then no resurrected parts appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)). -* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)). -* Show read rows in the progress indication while reading from STDIN from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)). -* Integration with LDAP: increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)). -* Allow the removal of sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). -* Support queries like `SHOW FULL TABLES ...` for MySQL compatibility. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)). -* Apply connection timeout settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)). -* The `unhex` function now supports `FixedString` arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)). -* Priority is given to deleting completely expired parts according to the TTL rules, see [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)). -* More precise and reactive CPU load indication in clickhouse-client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)). -* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)). -* Add `table_uuid` column to the `system.parts` table. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)). -* Added client option to display the number of locally processed rows in non-interactive mode (`--print-num-processed-rows`). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)). -* Implement `aggregation-in-order` optimization on top of a query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use the previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Allow to collect profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of the increment. It can be enabled by the setting `trace_profile_events` and used to investigate performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)). -* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)). -* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Correctly report errors in queries even when multiple JOINs optimization is taking place. [#43583](https://github.com/ClickHouse/ClickHouse/pull/43583) ([Salvatore](https://github.com/tbsal)). - -#### Build/Testing/Packaging Improvement - -* Systemd integration now correctly notifies systemd that the service is really started and is ready to serve requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). -* Added the option to build ClickHouse with OpenSSL using the [OpenSSL FIPS Module](https://www.openssl.org/docs/man3.0/man7/fips_module.html). This build type has not been tested to validate security and is not supported. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). -* Upgrade to the new `DeflateQpl` compression codec which has been implemented in a previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). - -#### Bug Fix (user-visible misbehavior in official stable or prestable release) - -* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)). -* Fix some incorrect logic in AST level optimization `optimize_normalize_count_variants`. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)). -* Fix a case when mutations are not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)). -* Fix the `skip_unavailable_shards` optimization which did not work with the `hdfsCluster` table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)). -* Fix `s3` support for the `?` wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)). -* Fix functions `arrayFirstOrNull` and `arrayLastOrNull` or null when the array contains `Nullable` elements. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)). -* Fix incorrect `UserTimeMicroseconds`/`SystemTimeMicroseconds` accounting related to Kafka tables. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). -* Do not suppress exceptions in `web` disks. Fix retries for the `web` disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)). -* Fixed (logical) race condition between inserts and dropping materialized views. A race condition happened when a Materialized View was dropped at the same time as an INSERT, where the MVs were present as a dependency of the insert at the begining of the execution, but the table has been dropped by the time the insert chain tries to access it, producing either an `UNKNOWN_TABLE` or `TABLE_IS_DROPPED` exception, and stopping the insertion. After this change, we avoid these exceptions and just continue with the insert if the dependency is gone. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)). -* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)). -* Flatten arrays from Parquet to avoid an issue with inconsistent data in arrays. These incorrect files can be generated by Apache Iceberg. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix bad cast from `LowCardinality` column when using short circuit function execution. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). -* Check and compare the content of the `format_version` file in `MergeTreeData` so that tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into `Buffer` tables. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)). -* Fix a bug that allowed the parser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)). -* `MaterializeMySQL` (experimental feature) support DDL: `drop table t1, t2` and compatible with most of MySQL DROP DDL. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)). -* `session_log` (experimental feature): Fixed the inability to log in (because of failure to create the session_log entry) in a very rare case of messed up setting profiles. [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). -* Fix possible `Cannot create non-empty column with type Nothing` in functions `if`/`multiIf`. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a bug when a row level filter uses the default value of a column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). -* Query with `DISTINCT` + `LIMIT BY` + `LIMIT` can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix `sumMap` for `Nullable(Decimal(...))`. [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). -* Fix `date_diff` for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)). -* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)). -* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). This error has been found by @tisonkun. -* Ensure consistency when `clickhouse-copier` updates status and `attach_is_done` in Keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lzydmxy](https://github.com/lzydmxy)). -* During the recovery of a lost replica of a `Replicated` database (experimental feature), there could a situation where we need to atomically swap two table names (use EXCHANGE). Previously we tried to use two RENAME queries, which was obviously failing and moreover, failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix the case when the `s3Cluster` function throws `NOT_FOUND_COLUMN_IN_BLOCK` error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)). -* Fix possible logical error `Array sizes mismatched` while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed possible exception in the case of distributed `GROUP BY` with an `ALIAS` column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)). -* Fix bug which can lead to broken projections if zero-copy replication (experimental feature) is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)). -* Fix using multipart upload for very large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)). -* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could have been applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). -* Fix a logical error in JOIN with `Join` table engine at right hand side, if `USING` is being used. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)). Fix a bug with wrong order of keys in `Join` table engine. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)). -* Keeper fix: throw if the interserver port for Raft is already in use. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix ORDER BY positional argument (example: `ORDER BY 1, 2`) in case of unneeded columns pruning from subqueries. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed exception when a subquery contains HAVING but doesn't contain an actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)). -* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)). - - -### ClickHouse release 22.11, 2022-11-17 - -#### Backward Incompatible Change -* `JSONExtract` family of functions will now attempt to coerce to the requested type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). - -#### New Feature -* Adds support for retries during INSERTs into ReplicatedMergeTree when a session with ClickHouse Keeper is lost. Apart from fault tolerance, it aims to provide better user experience, - avoid returning a user an error during insert if keeper is restarted (for example, due to upgrade). This is controlled by the `insert_keeper_max_retries` setting, which is disabled by default. [#42607](https://github.com/ClickHouse/ClickHouse/pull/42607) ([Igor Nikonov](https://github.com/devcrafter)). -* Add `Hudi` and `DeltaLake` table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do), [Kseniia Sumarokova](https://github.com/kssenii)). -* Add table function `hudi` and `deltaLake`. [#43080](https://github.com/ClickHouse/ClickHouse/pull/43080) ([flynn](https://github.com/ucasfl)). -* Support for composite time intervals. 1. Add, subtract and negate operations are now available on Intervals. In the case where the types of Intervals are different, they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added `**` glob support for recursive directory traversal of the filesystem and S3. Resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Introduce `s3_plain` disk type for write-once-read-many operations. Implement `ATTACH` of `MergeTree` table for `s3_plain` disk. [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). -* Added applied row-level policies to `system.query_log`. [#39819](https://github.com/ClickHouse/ClickHouse/pull/39819) ([Vladimir Chebotaryov](https://github.com/quickhouse)). -* Add four-letter command `csnp` for manually creating snapshots in ClickHouse Keeper. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). -* Add function `ascii` like in Apache Spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). -* Add function `positive_modulo` (`pmod`) which returns non-negative result based on modulo. [#42755](https://github.com/ClickHouse/ClickHouse/pull/42755) ([李扬](https://github.com/taiyang-li)). -* Add function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). -* Add function `randCanonical`, which is similar to the `rand` function in Apache Spark or Impala. The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1). [#43124](https://github.com/ClickHouse/ClickHouse/pull/43124) ([李扬](https://github.com/taiyang-li)). -* Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). -* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). -* Add generic implementation for arbitrary structured named collections, access type and `system.named_collections`. [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). - -#### Performance Improvement -* Parallelized merging of `uniqExact` states for aggregation without key, i.e. queries like `SELECT uniqExact(number) FROM table`. The improvement becomes noticeable when the number of unique keys approaches 10^6. Also `uniq` performance is slightly optimized. [#43072](https://github.com/ClickHouse/ClickHouse/pull/43072) ([Nikita Taranov](https://github.com/nickitat)). -* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)). -* Speed up AND and OR operators when they are sequenced. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Support parallel parsing for `LineAsString` input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). -* ClickHouse Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). -* A condition like `NOT LIKE 'prefix%'` can use the primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). - -#### Experimental Feature -* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). -* Ignore MySQL binlog SAVEPOINT event for MaterializedMySQL. [#42931](https://github.com/ClickHouse/ClickHouse/pull/42931) ([zzsmdfj](https://github.com/zzsmdfj)). Handle (ignore) SAVEPOINT queries in MaterializedMySQL. [#43086](https://github.com/ClickHouse/ClickHouse/pull/43086) ([Stig Bakken](https://github.com/stigsb)). - -#### Improvement -* Trivial queries with small LIMIT will properly determine the number of estimated rows to read, so that the threshold will be checked properly. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). -* Add support for interactive parameters in INSERT VALUES queries. [#43077](https://github.com/ClickHouse/ClickHouse/pull/43077) ([Nikolay Degterinsky](https://github.com/evillique)). -* Added new field `allow_readonly` in `system.table_functions` to allow using table functions in readonly mode. Resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* The `system.asynchronous_metrics` gets embedded documentation. This documentation is also exported to Prometheus. Fixed an error with the metrics about `cache` disks - they were calculated only for one arbitrary cache disk instead all of them. This closes [#7644](https://github.com/ClickHouse/ClickHouse/issues/7644). [#43194](https://github.com/ClickHouse/ClickHouse/pull/43194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). -* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). -* Remove covered parts for fetched part (to avoid possible replication delay grows). [#39737](https://github.com/ClickHouse/ClickHouse/pull/39737) ([Azat Khuzhin](https://github.com/azat)). -* If `/dev/tty` is available, the progress in clickhouse-client and clickhouse-local will be rendered directly to the terminal, without writing to STDERR. It allows getting progress even if STDERR is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add support for `FixedString` input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). -* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). -* Improve using structure from insertion table in table functions, now setting `use_structure_from_insertion_table_in_table_functions` has new possible value - `2` that means that ClickHouse will try to determine if we can use structure from insertion table or not automatically. Closes [#40028](https://github.com/ClickHouse/ClickHouse/issues/40028). [#42320](https://github.com/ClickHouse/ClickHouse/pull/42320) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)). -* Refactor function `tokens` to enable max tokens returned for related functions (disabled by default). [#42673](https://github.com/ClickHouse/ClickHouse/pull/42673) ([李扬](https://github.com/taiyang-li)). -* Allow to use `Date32` arguments for `formatDateTime` and `FROM_UNIXTIME` functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)). -* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)). -* Implement `read-in-order` optimization on top of query plan. It is enabled by default. Set `query_plan_read_in_order = 0` to use previous AST-based version. [#42829](https://github.com/ClickHouse/ClickHouse/pull/42829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Increase the size of upload part exponentially for backup to S3 to avoid errors about max 10 000 parts limit of the multipart upload to s3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)). -* When the merge task is continuously busy and the disk space is insufficient, the completely expired parts cannot be selected and dropped, resulting in insufficient disk space. My idea is that when the entire Part expires, there is no need for additional disk space to guarantee, ensure the normal execution of TTL. [#42869](https://github.com/ClickHouse/ClickHouse/pull/42869) ([zhongyuankai](https://github.com/zhongyuankai)). -* Add `oss` function and `OSS` table engine (this is convenient for users). oss is fully compatible with s3. [#43155](https://github.com/ClickHouse/ClickHouse/pull/43155) ([zzsmdfj](https://github.com/zzsmdfj)). -* Improve error reporting in the collection of OS-related info for the `system.asynchronous_metrics` table. [#43192](https://github.com/ClickHouse/ClickHouse/pull/43192) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Modify the `INFORMATION_SCHEMA` tables in a way so that ClickHouse can connect to itself using the MySQL compatibility protocol. Add columns instead of aliases (related to [#9769](https://github.com/ClickHouse/ClickHouse/issues/9769)). It will improve the compatibility with various MySQL clients. [#43198](https://github.com/ClickHouse/ClickHouse/pull/43198) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Add some functions for compatibility with PowerBI, when it connects using MySQL protocol [#42612](https://github.com/ClickHouse/ClickHouse/pull/42612) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Better usability for Dashboard on changes [#42872](https://github.com/ClickHouse/ClickHouse/pull/42872) ([Vladimir C](https://github.com/vdimir)). - -#### Build/Testing/Packaging Improvement -* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)). -* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)). -* Add support for testing ClickHouse server with Jepsen. By the way, we already have support for testing ClickHouse Keeper with Jepsen. This pull request extends it to Replicated tables. [#42619](https://github.com/ClickHouse/ClickHouse/pull/42619) ([Antonio Andelic](https://github.com/antonio2368)). -* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Remove some libraries from Ubuntu Docker image. [#42622](https://github.com/ClickHouse/ClickHouse/pull/42622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### Bug Fix (user-visible misbehavior in official stable or prestable release) - -* Updated normaliser to clone the alias ast. Resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Fix race for backup of tables in `Lazy` databases. [#43104](https://github.com/ClickHouse/ClickHouse/pull/43104) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix for `skip_unavailable_shards`: it did not work with the `s3Cluster` table function. [#43131](https://github.com/ClickHouse/ClickHouse/pull/43131) ([chen](https://github.com/xiedeyantu)). -* Fix schema inference in `s3Cluster` and improvement in `hdfsCluster`. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix retries while reading from URL table engines / table function. (retriable errors could be retries more times than needed, non-retriable errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). -* A segmentation fault related to DNS & c-ares has been reported and fixed. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). -* Fix typo in a setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix creating a Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). -* `(U)Int128` and `(U)Int256` values were correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix a bug in functions parser that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix the locking in `truncate table`. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). -* Fix possible crash in `web` disks when file does not exist (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). -* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). -* Fix stack-use-after-return under ASAN build in the Create User query parser. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix `lowerUTF8`/`upperUTF8` in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). -* Additional bound check was added to LZ4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). -* Fix rare possible hang on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect behavior with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). -* A null pointer will be generated when select if as from ‘three table join’ , For example, this SQL query: [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). -* Fix memory sanitizer report in Cluster Discovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). -* Improve DateTime schema inference in case of empty string. [#42911](https://github.com/ClickHouse/ClickHouse/pull/42911) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). -* Fix ATTACH TABLE in `PostgreSQL` database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix incorrect key analysis when nullable keys appear in the middle of a hyperrectangle. This fixes [#43111](https://github.com/ClickHouse/ClickHouse/issues/43111) . [#43133](https://github.com/ClickHouse/ClickHouse/pull/43133) ([Amos Bird](https://github.com/amosbird)). -* Fix several buffer over-reads in deserialization of carefully crafted aggregate function states. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). -* Fix function `if` in case of NULL and const Nullable arguments. Closes [#43069](https://github.com/ClickHouse/ClickHouse/issues/43069). [#43178](https://github.com/ClickHouse/ClickHouse/pull/43178) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix decimal math overflow in parsing DateTime with the 'best effort' algorithm. Closes [#43061](https://github.com/ClickHouse/ClickHouse/issues/43061). [#43180](https://github.com/ClickHouse/ClickHouse/pull/43180) ([Kruglov Pavel](https://github.com/Avogar)). -* The `indent` field produced by the `git-import` tool was miscalculated. See https://clickhouse.com/docs/en/getting-started/example-datasets/github/. [#43191](https://github.com/ClickHouse/ClickHouse/pull/43191) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fixed unexpected behaviour of `Interval` types with subquery and casting. [#43193](https://github.com/ClickHouse/ClickHouse/pull/43193) ([jh0x](https://github.com/jh0x)). - -### ClickHouse release 22.10, 2022-10-26 - -#### Backward Incompatible Change -* Rename cache commands: `show caches` -> `show filesystem caches`, `describe cache` -> `describe filesystem cache`. [#41508](https://github.com/ClickHouse/ClickHouse/pull/41508) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Remove support for the `WITH TIMEOUT` section for `LIVE VIEW`. This closes [#40557](https://github.com/ClickHouse/ClickHouse/issues/40557). [#42173](https://github.com/ClickHouse/ClickHouse/pull/42173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove support for the `{database}` macro from the client's prompt. It was displayed incorrectly if the database was unspecified and it was not updated on `USE` statements. This closes [#25891](https://github.com/ClickHouse/ClickHouse/issues/25891). [#42508](https://github.com/ClickHouse/ClickHouse/pull/42508) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature -* Composable protocol configuration is added. Now different protocols can be set up with different listen hosts. Protocol wrappers such as PROXYv1 can be set up over any other protocols (TCP, TCP secure, MySQL, Postgres). [#41198](https://github.com/ClickHouse/ClickHouse/pull/41198) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add `S3` as a new type of the destination of backups. Support BACKUP to S3 with as-is path/data structure. [#42333](https://github.com/ClickHouse/ClickHouse/pull/42333) ([Vitaly Baranov](https://github.com/vitlibar)), [#42232](https://github.com/ClickHouse/ClickHouse/pull/42232) ([Azat Khuzhin](https://github.com/azat)). -* Added functions (`randUniform`, `randNormal`, `randLogNormal`, `randExponential`, `randChiSquared`, `randStudentT`, `randFisherF`, `randBernoulli`, `randBinomial`, `randNegativeBinomial`, `randPoisson`) to generate random values according to the specified distributions. This closes [#21834](https://github.com/ClickHouse/ClickHouse/issues/21834). [#42411](https://github.com/ClickHouse/ClickHouse/pull/42411) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* An improvement for ClickHouse Keeper: add support for uploading snapshots to S3. S3 information can be defined inside `keeper_server.s3_snapshot`. [#41342](https://github.com/ClickHouse/ClickHouse/pull/41342) ([Antonio Andelic](https://github.com/antonio2368)). -* Added an aggregate function `analysisOfVariance` (`anova`) to perform a statistical test over several groups of normally distributed observations to find out whether all groups have the same mean or not. Original PR [#37872](https://github.com/ClickHouse/ClickHouse/issues/37872). [#42131](https://github.com/ClickHouse/ClickHouse/pull/42131) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Support limiting of temporary data stored on disk using settings `max_temporary_data_on_disk_size_for_user`/`max_temporary_data_on_disk_size_for_query` . [#40893](https://github.com/ClickHouse/ClickHouse/pull/40893) ([Vladimir C](https://github.com/vdimir)). -* Add setting `format_json_object_each_row_column_for_object_name` to write/parse object name as column value in JSONObjectEachRow format. [#41703](https://github.com/ClickHouse/ClickHouse/pull/41703) ([Kruglov Pavel](https://github.com/Avogar)). -* Add BLAKE3 hash-function to SQL. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)). -* The function `javaHash` has been extended to integers. [#41131](https://github.com/ClickHouse/ClickHouse/pull/41131) ([JackyWoo](https://github.com/JackyWoo)). -* Add OpenTelemetry support to ON CLUSTER DDL (require `distributed_ddl_entry_format_version` to be set to 4). [#41484](https://github.com/ClickHouse/ClickHouse/pull/41484) ([Frank Chen](https://github.com/FrankChen021)). -* Added system table `asynchronous_insert_log`. It contains information about asynchronous inserts (including results of queries in fire-and-forget mode (with `wait_for_async_insert=0`)) for better introspection. [#42040](https://github.com/ClickHouse/ClickHouse/pull/42040) ([Anton Popov](https://github.com/CurtizJ)). -* Add support for methods `lz4`, `bz2`, `snappy` in HTTP's `Accept-Encoding` which is a non-standard extension to HTTP protocol. [#42071](https://github.com/ClickHouse/ClickHouse/pull/42071) ([Nikolay Degterinsky](https://github.com/evillique)). -* Adds Morton Coding (ZCurve) encode/decode functions. [#41753](https://github.com/ClickHouse/ClickHouse/pull/41753) ([Constantine Peresypkin](https://github.com/pkit)). -* Add support for `SET setting_name = DEFAULT`. [#42187](https://github.com/ClickHouse/ClickHouse/pull/42187) ([Filatenkov Artur](https://github.com/FArthur-cmd)). - -#### Experimental Feature -* Added new infrastructure for query analysis and planning under the `allow_experimental_analyzer` setting. [#31796](https://github.com/ClickHouse/ClickHouse/pull/31796) ([Maksim Kita](https://github.com/kitaisreal)). -* Initial implementation of Kusto Query Language. Please don't use it. [#37961](https://github.com/ClickHouse/ClickHouse/pull/37961) ([Yong Wang](https://github.com/kashwy)). - -#### Performance Improvement -* Relax the "Too many parts" threshold. This closes [#6551](https://github.com/ClickHouse/ClickHouse/issues/6551). Now ClickHouse will allow more parts in a partition if the average part size is large enough (at least 10 GiB). This allows to have up to petabytes of data in a single partition of a single table on a single server, which is possible using disk shelves or object storage. [#42002](https://github.com/ClickHouse/ClickHouse/pull/42002) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Implement operator precedence element parser to make the required stack size smaller. [#34892](https://github.com/ClickHouse/ClickHouse/pull/34892) ([Nikolay Degterinsky](https://github.com/evillique)). -* DISTINCT in order optimization leverage sorting properties of data streams. This improvement will enable reading in order for DISTINCT if applicable (before it was necessary to provide ORDER BY for columns in DISTINCT). [#41014](https://github.com/ClickHouse/ClickHouse/pull/41014) ([Igor Nikonov](https://github.com/devcrafter)). -* ColumnVector: optimize UInt8 index with AVX512VBMI. [#41247](https://github.com/ClickHouse/ClickHouse/pull/41247) ([Guo Wangyang](https://github.com/guowangy)). -* Optimize the lock contentions for `ThreadGroupStatus::mutex`. The performance experiments of **SSB** (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could bring a **2.95x** improvement of the geomean of all subcases' QPS. [#41675](https://github.com/ClickHouse/ClickHouse/pull/41675) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* Add `ldapr` capabilities to AArch64 builds. This is supported from Graviton 2+, Azure and GCP instances. Only appeared in clang-15 [not so long ago](https://github.com/llvm/llvm-project/commit/9609b5daffe9fd28d83d83da895abc5113f76c24). [#41778](https://github.com/ClickHouse/ClickHouse/pull/41778) ([Daniel Kutenin](https://github.com/danlark1)). -* Improve performance when comparing strings and one argument is an empty constant string. [#41870](https://github.com/ClickHouse/ClickHouse/pull/41870) ([Jiebin Sun](https://github.com/jiebinn)). -* Optimize `insertFrom` of ColumnAggregateFunction to share Aggregate State in some cases. [#41960](https://github.com/ClickHouse/ClickHouse/pull/41960) ([flynn](https://github.com/ucasfl)). -* Make writing to `azure_blob_storage` disks faster (respect `max_single_part_upload_size` instead of writing a block per each buffer size). Inefficiency mentioned in [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42041](https://github.com/ClickHouse/ClickHouse/pull/42041) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Make thread ids in the process list and query_log unique to avoid waste. [#42180](https://github.com/ClickHouse/ClickHouse/pull/42180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Support skipping cache completely (both download to cache and reading cached data) in case the requested read range exceeds the threshold defined by cache setting `bypass_cache_threashold`, requires to be enabled with `enable_bypass_cache_with_threshold`). [#42418](https://github.com/ClickHouse/ClickHouse/pull/42418) ([Han Shukai](https://github.com/KinderRiven)). This helps on slow local disks. - -#### Improvement -* Add setting `allow_implicit_no_password`: in combination with `allow_no_password` it forbids creating a user with no password unless `IDENTIFIED WITH no_password` is explicitly specified. [#41341](https://github.com/ClickHouse/ClickHouse/pull/41341) ([Nikolay Degterinsky](https://github.com/evillique)). -* Embedded Keeper will always start in the background allowing ClickHouse to start without achieving quorum. [#40991](https://github.com/ClickHouse/ClickHouse/pull/40991) ([Antonio Andelic](https://github.com/antonio2368)). -* Made reestablishing a new connection to ZooKeeper more reactive in case of expiration of the previous one. Previously there was a task which spawns every minute by default and thus a table could be in readonly state for about this time. [#41092](https://github.com/ClickHouse/ClickHouse/pull/41092) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Now projections can be used with zero copy replication (zero-copy replication is a non-production feature). [#41147](https://github.com/ClickHouse/ClickHouse/pull/41147) ([alesapin](https://github.com/alesapin)). -* Support expression `(EXPLAIN SELECT ...)` in a subquery. Queries like `SELECT * FROM (EXPLAIN PIPELINE SELECT col FROM TABLE ORDER BY col)` became valid. [#40630](https://github.com/ClickHouse/ClickHouse/pull/40630) ([Vladimir C](https://github.com/vdimir)). -* Allow changing `async_insert_max_data_size` or `async_insert_busy_timeout_ms` in scope of query. E.g. user wants to insert data rarely and she doesn't have access to the server config to tune default settings. [#40668](https://github.com/ClickHouse/ClickHouse/pull/40668) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Improvements for reading from remote filesystems, made threadpool size for reads/writes configurable. Closes [#41070](https://github.com/ClickHouse/ClickHouse/issues/41070). [#41011](https://github.com/ClickHouse/ClickHouse/pull/41011) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support all combinators combination in WindowTransform/arratReduce*/initializeAggregation/aggregate functions versioning. Previously combinators like `ForEach/Resample/Map` didn't work in these places, using them led to exception like`State function ... inserts results into non-state column`. [#41107](https://github.com/ClickHouse/ClickHouse/pull/41107) ([Kruglov Pavel](https://github.com/Avogar)). -* Add function `tryDecrypt` that returns NULL when decrypt fails (e.g. decrypt with incorrect key) instead of throwing an exception. [#41206](https://github.com/ClickHouse/ClickHouse/pull/41206) ([Duc Canh Le](https://github.com/canhld94)). -* Add the `unreserved_space` column to the `system.disks` table to check how much space is not taken by reservations per disk. [#41254](https://github.com/ClickHouse/ClickHouse/pull/41254) ([filimonov](https://github.com/filimonov)). -* Support s3 authorization headers in table function arguments. [#41261](https://github.com/ClickHouse/ClickHouse/pull/41261) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add support for MultiRead in Keeper and internal ZooKeeper client (this is an extension to ZooKeeper protocol, only available in ClickHouse Keeper). [#41410](https://github.com/ClickHouse/ClickHouse/pull/41410) ([Antonio Andelic](https://github.com/antonio2368)). -* Add support for decimal type comparing with floating point literal in IN operator. [#41544](https://github.com/ClickHouse/ClickHouse/pull/41544) ([liang.huang](https://github.com/lhuang09287750)). -* Allow readable size values (like `1TB`) in cache config. [#41688](https://github.com/ClickHouse/ClickHouse/pull/41688) ([Kseniia Sumarokova](https://github.com/kssenii)). -* ClickHouse could cache stale DNS entries for some period of time (15 seconds by default) until the cache won't be updated asynchronously. During these periods ClickHouse can nevertheless try to establish a connection and produce errors. This behavior is fixed. [#41707](https://github.com/ClickHouse/ClickHouse/pull/41707) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add interactive history search with fzf-like utility (fzf/sk) for `clickhouse-client`/`clickhouse-local` (note you can use `FZF_DEFAULT_OPTS`/`SKIM_DEFAULT_OPTIONS` to additionally configure the behavior). [#41730](https://github.com/ClickHouse/ClickHouse/pull/41730) ([Azat Khuzhin](https://github.com/azat)). -* Only allow clients connecting to a secure server with an invalid certificate only to proceed with the '--accept-certificate' flag. [#41743](https://github.com/ClickHouse/ClickHouse/pull/41743) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add function `tryBase58Decode`, similar to the existing function `tryBase64Decode`. [#41824](https://github.com/ClickHouse/ClickHouse/pull/41824) ([Robert Schulze](https://github.com/rschu1ze)). -* Improve feedback when replacing partition with different primary key. Fixes [#34798](https://github.com/ClickHouse/ClickHouse/issues/34798). [#41838](https://github.com/ClickHouse/ClickHouse/pull/41838) ([Salvatore](https://github.com/tbsal)). -* Fix parallel parsing: segmentator now checks `max_block_size`. This fixed memory overallocation in case of parallel parsing and small LIMIT. [#41852](https://github.com/ClickHouse/ClickHouse/pull/41852) ([Vitaly Baranov](https://github.com/vitlibar)). -* Don't add "TABLE_IS_DROPPED" exception to `system.errors` if it's happened during SELECT from a system table and was ignored. [#41908](https://github.com/ClickHouse/ClickHouse/pull/41908) ([AlfVII](https://github.com/AlfVII)). -* Improve option `enable_extended_results_for_datetime_functions` to return results of type DateTime64 for functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute` and `timeSlot`. [#41910](https://github.com/ClickHouse/ClickHouse/pull/41910) ([Roman Vasin](https://github.com/rvasin)). -* Improve `DateTime` type inference for text formats. Now it respects setting `date_time_input_format` and doesn't try to infer datetimes from numbers as timestamps. Closes [#41389](https://github.com/ClickHouse/ClickHouse/issues/41389) Closes [#42206](https://github.com/ClickHouse/ClickHouse/issues/42206). [#41912](https://github.com/ClickHouse/ClickHouse/pull/41912) ([Kruglov Pavel](https://github.com/Avogar)). -* Remove confusing warning when inserting with `perform_ttl_move_on_insert` = false. [#41980](https://github.com/ClickHouse/ClickHouse/pull/41980) ([Vitaly Baranov](https://github.com/vitlibar)). -* Allow user to write `countState(*)` similar to `count(*)`. This closes [#9338](https://github.com/ClickHouse/ClickHouse/issues/9338). [#41983](https://github.com/ClickHouse/ClickHouse/pull/41983) ([Amos Bird](https://github.com/amosbird)). -* Fix `rankCorr` size overflow. [#42020](https://github.com/ClickHouse/ClickHouse/pull/42020) ([Duc Canh Le](https://github.com/canhld94)). -* Added an option to specify an arbitrary string as an environment name in the Sentry's config for more handy reports. [#42037](https://github.com/ClickHouse/ClickHouse/pull/42037) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix parsing out-of-range Date from CSV. [#42044](https://github.com/ClickHouse/ClickHouse/pull/42044) ([Andrey Zvonov](https://github.com/zvonand)). -* `parseDataTimeBestEffort` now supports comma between date and time. Closes [#42038](https://github.com/ClickHouse/ClickHouse/issues/42038). [#42049](https://github.com/ClickHouse/ClickHouse/pull/42049) ([flynn](https://github.com/ucasfl)). -* Improved stale replica recovery process for `ReplicatedMergeTree`. If a lost replica has some parts which are absent from a healthy replica, but these parts should appear in the future according to the replication queue of the healthy replica, then the lost replica will keep such parts instead of detaching them. [#42134](https://github.com/ClickHouse/ClickHouse/pull/42134) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add a possibility to use `Date32` arguments for date_diff function. Fix issue in date_diff function when using DateTime64 arguments with a start date before Unix epoch and end date after Unix epoch. [#42308](https://github.com/ClickHouse/ClickHouse/pull/42308) ([Roman Vasin](https://github.com/rvasin)). -* When uploading big parts to Minio, 'Complete Multipart Upload' can take a long time. Minio sends heartbeats every 10 seconds (see https://github.com/minio/minio/pull/7198). But clickhouse times out earlier, because the default send/receive timeout is [set](https://github.com/ClickHouse/ClickHouse/blob/cc24fcd6d5dfb67f5f66f5483e986bd1010ad9cf/src/IO/S3/PocoHTTPClient.cpp#L123) to 5 seconds. [#42321](https://github.com/ClickHouse/ClickHouse/pull/42321) ([filimonov](https://github.com/filimonov)). -* Fix rarely invalid cast of aggregate state types with complex types such as Decimal. This fixes [#42408](https://github.com/ClickHouse/ClickHouse/issues/42408). [#42417](https://github.com/ClickHouse/ClickHouse/pull/42417) ([Amos Bird](https://github.com/amosbird)). -* Allow to use `Date32` arguments for `dateName` function. [#42554](https://github.com/ClickHouse/ClickHouse/pull/42554) ([Roman Vasin](https://github.com/rvasin)). -* Now filters with NULL literals will be used during index analysis. [#34063](https://github.com/ClickHouse/ClickHouse/issues/34063). [#41842](https://github.com/ClickHouse/ClickHouse/pull/41842) ([Amos Bird](https://github.com/amosbird)). -* Merge parts if every part in the range is older than a certain threshold. The threshold can be set by using `min_age_to_force_merge_seconds`. This closes [#35836](https://github.com/ClickHouse/ClickHouse/issues/35836). [#42423](https://github.com/ClickHouse/ClickHouse/pull/42423) ([Antonio Andelic](https://github.com/antonio2368)). This is continuation of [#39550i](https://github.com/ClickHouse/ClickHouse/pull/39550) by [@fastio](https://github.com/fastio) who implemented most of the logic. -* Improve the time to recover lost keeper connections. [#42541](https://github.com/ClickHouse/ClickHouse/pull/42541) ([Raúl Marín](https://github.com/Algunenano)). - -#### Build/Testing/Packaging Improvement -* Add fuzzer for table definitions [#40096](https://github.com/ClickHouse/ClickHouse/pull/40096) ([Anton Popov](https://github.com/CurtizJ)). This represents the biggest advancement for ClickHouse testing in this year so far. -* Beta version of the ClickHouse Cloud service is released: [https://clickhouse.cloud/](https://clickhouse.cloud/). It provides the easiest way to use ClickHouse (even slightly easier than the single-command installation). -* Added support of WHERE clause generation to AST Fuzzer and possibility to add or remove ORDER BY and WHERE clause. [#38519](https://github.com/ClickHouse/ClickHouse/pull/38519) ([Ilya Yatsishin](https://github.com/qoega)). -* Aarch64 binaries now require at least ARMv8.2, released in 2016. Most notably, this enables use of ARM LSE, i.e. native atomic operations. Also, CMake build option "NO_ARMV81_OR_HIGHER" has been added to allow compilation of binaries for older ARMv8.0 hardware, e.g. Raspberry Pi 4. [#41610](https://github.com/ClickHouse/ClickHouse/pull/41610) ([Robert Schulze](https://github.com/rschu1ze)). -* Allow building ClickHouse with Musl (small changes after it was already supported but broken). [#41987](https://github.com/ClickHouse/ClickHouse/pull/41987) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add the `$CLICKHOUSE_CRONFILE` file checking to avoid running the `sed` command to get the file not found error on install. [#42081](https://github.com/ClickHouse/ClickHouse/pull/42081) ([Chun-Sheng, Li](https://github.com/peter279k)). -* Update cctz to `2022e` to support the new timezone changes. Palestine transitions are now Saturdays at 02:00. Simplify three Ukraine zones into one. Jordan and Syria switch from +02/+03 with DST to year-round +03. (https://data.iana.org/time-zones/tzdb/NEWS). This closes [#42252](https://github.com/ClickHouse/ClickHouse/issues/42252). [#42327](https://github.com/ClickHouse/ClickHouse/pull/42327) ([Alexey Milovidov](https://github.com/alexey-milovidov)). [#42273](https://github.com/ClickHouse/ClickHouse/pull/42273) ([Dom Del Nano](https://github.com/ddelnano)). -* Add Rust code support into ClickHouse with BLAKE3 hash-function library as an example. [#33435](https://github.com/ClickHouse/ClickHouse/pull/33435) ([BoloniniD](https://github.com/BoloniniD)). - -#### Bug Fix (user-visible misbehavior in official stable or prestable release) - -* Choose correct aggregation method for `LowCardinality` with big integer types. [#42342](https://github.com/ClickHouse/ClickHouse/pull/42342) ([Duc Canh Le](https://github.com/canhld94)). -* Several fixes for `web` disk. [#41652](https://github.com/ClickHouse/ClickHouse/pull/41652) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixes an issue that causes docker run to fail if `https_port` is not present in config. [#41693](https://github.com/ClickHouse/ClickHouse/pull/41693) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Mutations were not cancelled properly on server shutdown or `SYSTEM STOP MERGES` query and cancellation might take long time, it's fixed. [#41699](https://github.com/ClickHouse/ClickHouse/pull/41699) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix wrong result of queries with `ORDER BY` or `GROUP BY` by columns from prefix of sorting key, wrapped into monotonic functions, with enable "read in order" optimization (settings `optimize_read_in_order` and `optimize_aggregation_in_order`). [#41701](https://github.com/ClickHouse/ClickHouse/pull/41701) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible crash in `SELECT` from `Merge` table with enabled `optimize_monotonous_functions_in_order_by` setting. Fixes [#41269](https://github.com/ClickHouse/ClickHouse/issues/41269). [#41740](https://github.com/ClickHouse/ClickHouse/pull/41740) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fixed "Part ... intersects part ..." error that might happen in extremely rare cases if replica was restarted just after detaching some part as broken. [#41741](https://github.com/ClickHouse/ClickHouse/pull/41741) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Don't allow to create or alter merge tree tables with column name `_row_exists`, which is reserved for lightweight delete. Fixed [#41716](https://github.com/ClickHouse/ClickHouse/issues/41716). [#41763](https://github.com/ClickHouse/ClickHouse/pull/41763) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Fix a bug that CORS headers are missing in some HTTP responses. [#41792](https://github.com/ClickHouse/ClickHouse/pull/41792) ([Frank Chen](https://github.com/FrankChen021)). -* 22.9 might fail to startup `ReplicatedMergeTree` table if that table was created by 20.3 or older version and was never altered, it's fixed. Fixes [#41742](https://github.com/ClickHouse/ClickHouse/issues/41742). [#41796](https://github.com/ClickHouse/ClickHouse/pull/41796) ([Alexander Tokmakov](https://github.com/tavplubix)). -* When the batch sending fails for some reason, it cannot be automatically recovered, and if it is not processed in time, it will lead to accumulation, and the printed error message will become longer and longer, which will cause the http thread to block. [#41813](https://github.com/ClickHouse/ClickHouse/pull/41813) ([zhongyuankai](https://github.com/zhongyuankai)). -* Fix compact parts with compressed marks setting. Fixes [#41783](https://github.com/ClickHouse/ClickHouse/issues/41783) and [#41746](https://github.com/ClickHouse/ClickHouse/issues/41746). [#41823](https://github.com/ClickHouse/ClickHouse/pull/41823) ([alesapin](https://github.com/alesapin)). -* Old versions of Replicated database don't have a special marker in [Zoo]Keeper. We need to check only whether the node contains come obscure data instead of special mark. [#41875](https://github.com/ClickHouse/ClickHouse/pull/41875) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix possible exception in fs cache. [#41884](https://github.com/ClickHouse/ClickHouse/pull/41884) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix `use_environment_credentials` for s3 table function. [#41970](https://github.com/ClickHouse/ClickHouse/pull/41970) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed "Directory already exists and is not empty" error on detaching broken part that might prevent `ReplicatedMergeTree` table from starting replication. Fixes [#40957](https://github.com/ClickHouse/ClickHouse/issues/40957). [#41981](https://github.com/ClickHouse/ClickHouse/pull/41981) ([Alexander Tokmakov](https://github.com/tavplubix)). -* `toDateTime64` now returns the same output with negative integer and float arguments. [#42025](https://github.com/ClickHouse/ClickHouse/pull/42025) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix write into `azure_blob_storage`. Partially closes [#41754](https://github.com/ClickHouse/ClickHouse/issues/41754). [#42034](https://github.com/ClickHouse/ClickHouse/pull/42034) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix the `bzip2` decoding issue for specific `bzip2` files. [#42046](https://github.com/ClickHouse/ClickHouse/pull/42046) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix SQL function `toLastDayOfMonth` with setting "enable_extended_results_for_datetime_functions = 1" at the beginning of the extended range (January 1900). - Fix SQL function "toRelativeWeekNum()" with setting "enable_extended_results_for_datetime_functions = 1" at the end of extended range (December 2299). - Improve the performance of for SQL functions "toISOYear()", "toFirstDayNumOfISOYearIndex()" and "toYearWeekOfNewyearMode()" by avoiding unnecessary index arithmetics. [#42084](https://github.com/ClickHouse/ClickHouse/pull/42084) ([Roman Vasin](https://github.com/rvasin)). -* The maximum size of fetches for each table accidentally was set to 8 while the pool size could be bigger. Now the maximum size of fetches for table is equal to the pool size. [#42090](https://github.com/ClickHouse/ClickHouse/pull/42090) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* A table might be shut down and a dictionary might be detached before checking if can be dropped without breaking dependencies between table, it's fixed. Fixes [#41982](https://github.com/ClickHouse/ClickHouse/issues/41982). [#42106](https://github.com/ClickHouse/ClickHouse/pull/42106) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible timeout exception for distributed queries with use_hedged_requests = 0. [#42130](https://github.com/ClickHouse/ClickHouse/pull/42130) ([Azat Khuzhin](https://github.com/azat)). -* Fixed a minor bug inside function `runningDifference` in case of using it with `Date32` type. Previously `Date` was used and it may cause some logical errors like `Bad cast from type DB::ColumnVector to DB::ColumnVector'`. [#42143](https://github.com/ClickHouse/ClickHouse/pull/42143) ([Alfred Xu](https://github.com/sperlingxx)). -* Fix reusing of files > 4GB from base backup. [#42146](https://github.com/ClickHouse/ClickHouse/pull/42146) ([Azat Khuzhin](https://github.com/azat)). -* DISTINCT in order fails with LOGICAL_ERROR if first column in sorting key contains function. [#42186](https://github.com/ClickHouse/ClickHouse/pull/42186) ([Igor Nikonov](https://github.com/devcrafter)). -* Fix a bug with projections and the `aggregate_functions_null_for_empty` setting. This bug is very rare and appears only if you enable the `aggregate_functions_null_for_empty` setting in the server's config. This closes [#41647](https://github.com/ClickHouse/ClickHouse/issues/41647). [#42198](https://github.com/ClickHouse/ClickHouse/pull/42198) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix read from `Buffer` tables with read in order desc. [#42236](https://github.com/ClickHouse/ClickHouse/pull/42236) ([Duc Canh Le](https://github.com/canhld94)). -* Fix a bug which prevents ClickHouse to start when `background_pool_size setting` is set on default profile but `background_merges_mutations_concurrency_ratio` is not. [#42315](https://github.com/ClickHouse/ClickHouse/pull/42315) ([nvartolomei](https://github.com/nvartolomei)). -* `ALTER UPDATE` of attached part (with columns different from table schema) could create an invalid `columns.txt` metadata on disk. Reading from such part could fail with errors or return invalid data. Fixes [#42161](https://github.com/ClickHouse/ClickHouse/issues/42161). [#42319](https://github.com/ClickHouse/ClickHouse/pull/42319) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Setting `additional_table_filters` were not applied to `Distributed` storage. Fixes [#41692](https://github.com/ClickHouse/ClickHouse/issues/41692). [#42322](https://github.com/ClickHouse/ClickHouse/pull/42322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix a data race in query finish/cancel. This closes [#42346](https://github.com/ClickHouse/ClickHouse/issues/42346). [#42362](https://github.com/ClickHouse/ClickHouse/pull/42362) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* This reverts [#40217](https://github.com/ClickHouse/ClickHouse/issues/40217) which introduced a regression in date/time functions. [#42367](https://github.com/ClickHouse/ClickHouse/pull/42367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix assert cast in join on falsy condition, Close [#42380](https://github.com/ClickHouse/ClickHouse/issues/42380). [#42407](https://github.com/ClickHouse/ClickHouse/pull/42407) ([Vladimir C](https://github.com/vdimir)). -* Fix buffer overflow in the processing of Decimal data types. This closes [#42451](https://github.com/ClickHouse/ClickHouse/issues/42451). [#42465](https://github.com/ClickHouse/ClickHouse/pull/42465) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* `AggregateFunctionQuantile` now correctly works with UInt128 columns. Previously, the quantile state interpreted `UInt128` columns as `Int128` which could have led to incorrect results. [#42473](https://github.com/ClickHouse/ClickHouse/pull/42473) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix bad_cast assert during INSERT into `Annoy` indexes over non-Float32 columns. `Annoy` indices is an experimental feature. [#42485](https://github.com/ClickHouse/ClickHouse/pull/42485) ([Robert Schulze](https://github.com/rschu1ze)). -* Arithmetic operator with Date or DateTime and 128 or 256-bit integer was referencing uninitialized memory. [#42453](https://github.com/ClickHouse/ClickHouse/issues/42453). [#42573](https://github.com/ClickHouse/ClickHouse/pull/42573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix unexpected table loading error when partition key contains alias function names during server upgrade. [#36379](https://github.com/ClickHouse/ClickHouse/pull/36379) ([Amos Bird](https://github.com/amosbird)). - - -### ClickHouse release 22.9, 2022-09-22 - -#### Backward Incompatible Change - -* Upgrade from 20.3 and older to 22.9 and newer should be done through an intermediate version if there are any `ReplicatedMergeTree` tables, otherwise server with the new version will not start. [#40641](https://github.com/ClickHouse/ClickHouse/pull/40641) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Remove the functions `accurate_Cast` and `accurate_CastOrNull` (they are different to `accurateCast` and `accurateCastOrNull` by underscore in the name and they are not affected by the value of `cast_keep_nullable` setting). These functions were undocumented, untested, unused, and unneeded. They appeared to be alive due to code generalization. [#40682](https://github.com/ClickHouse/ClickHouse/pull/40682) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add a test to ensure that every new table function will be documented. See [#40649](https://github.com/ClickHouse/ClickHouse/issues/40649). Rename table function `MeiliSearch` to `meilisearch`. [#40709](https://github.com/ClickHouse/ClickHouse/pull/40709) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add a test to ensure that every new function will be documented. See [#40649](https://github.com/ClickHouse/ClickHouse/pull/40649). The functions `lemmatize`, `synonyms`, `stem` were case-insensitive by mistake. Now they are case-sensitive. [#40711](https://github.com/ClickHouse/ClickHouse/pull/40711) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Make interpretation of YAML configs to be more conventional. [#41044](https://github.com/ClickHouse/ClickHouse/pull/41044) ([Vitaly Baranov](https://github.com/vitlibar)). - -#### New Feature - -* Support `insert_quorum = 'auto'` to use majority number. [#39970](https://github.com/ClickHouse/ClickHouse/pull/39970) ([Sachin](https://github.com/SachinSetiya)). -* Add embedded dashboards to ClickHouse server. This is a demo project about how to achieve 90% results with 1% effort using ClickHouse features. [#40461](https://github.com/ClickHouse/ClickHouse/pull/40461) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added new settings constraint writability kind `changeable_in_readonly`. [#40631](https://github.com/ClickHouse/ClickHouse/pull/40631) ([Sergei Trifonov](https://github.com/serxa)). -* Add support for `INTERSECT DISTINCT` and `EXCEPT DISTINCT`. [#40792](https://github.com/ClickHouse/ClickHouse/pull/40792) ([Duc Canh Le](https://github.com/canhld94)). -* Add new input/output format `JSONObjectEachRow` - Support import for formats `JSON/JSONCompact/JSONColumnsWithMetadata`. Add new setting `input_format_json_validate_types_from_metadata` that controls whether we should check if data types from metadata match data types from the header. - Add new setting `input_format_json_validate_utf8`, when it's enabled, all `JSON` formats will validate UTF-8 sequences. It will be disabled by default. Note that this setting doesn't influence output formats `JSON/JSONCompact/JSONColumnsWithMetadata`, they always validate utf8 sequences (this exception was made because of compatibility reasons). - Add new setting `input_format_json_read_numbers_as_strings ` that allows to parse numbers in String column, the setting is disabled by default. - Add new setting `output_format_json_quote_decimals` that allows to output decimals in double quotes, disabled by default. - Allow to parse decimals in double quotes during data import. [#40910](https://github.com/ClickHouse/ClickHouse/pull/40910) ([Kruglov Pavel](https://github.com/Avogar)). -* Query parameters supported in DESCRIBE TABLE query. [#40952](https://github.com/ClickHouse/ClickHouse/pull/40952) ([Nikita Taranov](https://github.com/nickitat)). -* Add support to Parquet Time32/64 by converting it into DateTime64. Parquet time32/64 represents time elapsed since midnight, while DateTime32/64 represents an actual unix timestamp. Conversion simply offsets from `0`. [#41333](https://github.com/ClickHouse/ClickHouse/pull/41333) ([Arthur Passos](https://github.com/arthurpassos)). -* Implement set operations on Apache Datasketches. [#39919](https://github.com/ClickHouse/ClickHouse/pull/39919) ([Fangyuan Deng](https://github.com/pzhdfy)). Note: there is no point of using Apache Datasketches, they are inferiour than ClickHouse and only make sense for integration with other systems. -* Allow recording errors to specified file while reading text formats (`CSV`, `TSV`). [#40516](https://github.com/ClickHouse/ClickHouse/pull/40516) ([zjial](https://github.com/zjial)). - -#### Experimental Feature - -* Add ANN (approximate nearest neighbor) index based on `Annoy`. [#40818](https://github.com/ClickHouse/ClickHouse/pull/40818) ([Filatenkov Artur](https://github.com/FArthur-cmd)). [#37215](https://github.com/ClickHouse/ClickHouse/pull/37215) ([VVMak](https://github.com/VVMak)). -* Add new storage engine `KeeperMap`, that uses ClickHouse Keeper or ZooKeeper as a key-value store. [#39976](https://github.com/ClickHouse/ClickHouse/pull/39976) ([Antonio Andelic](https://github.com/antonio2368)). This storage engine is intended to store a small amount of metadata. -* Improvement for in-memory data parts: remove completely processed WAL files. [#40592](https://github.com/ClickHouse/ClickHouse/pull/40592) ([Azat Khuzhin](https://github.com/azat)). - -#### Performance Improvement - -* Implement compression of marks and primary key. Close [#34437](https://github.com/ClickHouse/ClickHouse/issues/34437). [#37693](https://github.com/ClickHouse/ClickHouse/pull/37693) ([zhongyuankai](https://github.com/zhongyuankai)). -* Allow to load marks with threadpool in advance. Regulated by setting `load_marks_asynchronously` (default: 0). [#40821](https://github.com/ClickHouse/ClickHouse/pull/40821) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Virtual filesystem over s3 will use random object names split into multiple path prefixes for better performance on AWS. [#40968](https://github.com/ClickHouse/ClickHouse/pull/40968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Account `max_block_size` value while producing single-level aggregation results. Allows to execute following query plan steps using more threads. [#39138](https://github.com/ClickHouse/ClickHouse/pull/39138) ([Nikita Taranov](https://github.com/nickitat)). -* Software prefetching is used in aggregation to speed up operations with hash tables. Controlled by the setting `enable_software_prefetch_in_aggregation`, enabled by default. [#39304](https://github.com/ClickHouse/ClickHouse/pull/39304) ([Nikita Taranov](https://github.com/nickitat)). -* Better support of `optimize_read_in_order` in case when some of sorting key columns are always constant after applying `WHERE` clause. E.g. query like `SELECT ... FROM table WHERE a = 'x' ORDER BY a, b`, where `table` has storage definition: `MergeTree ORDER BY (a, b)`. [#38715](https://github.com/ClickHouse/ClickHouse/pull/38715) ([Anton Popov](https://github.com/CurtizJ)). -* Filter joined streams for `full_sorting_join` by each other before sorting. [#39418](https://github.com/ClickHouse/ClickHouse/pull/39418) ([Vladimir C](https://github.com/vdimir)). -* LZ4 decompression optimised by skipping empty literals processing. [#40142](https://github.com/ClickHouse/ClickHouse/pull/40142) ([Nikita Taranov](https://github.com/nickitat)). -* Speedup backup process using native `copy` when possible instead of copying through `clickhouse-server` memory. [#40395](https://github.com/ClickHouse/ClickHouse/pull/40395) ([alesapin](https://github.com/alesapin)). -* Do not obtain storage snapshot for each INSERT block (slightly improves performance). [#40638](https://github.com/ClickHouse/ClickHouse/pull/40638) ([Azat Khuzhin](https://github.com/azat)). -* Implement batch processing for aggregate functions with multiple nullable arguments. [#41058](https://github.com/ClickHouse/ClickHouse/pull/41058) ([Raúl Marín](https://github.com/Algunenano)). -* Speed up reading UniquesHashSet (`uniqState` from disk for example). [#41089](https://github.com/ClickHouse/ClickHouse/pull/41089) ([Raúl Marín](https://github.com/Algunenano)). -* Fixed high memory usage while executing mutations of compact parts in tables with huge number of columns. [#41122](https://github.com/ClickHouse/ClickHouse/pull/41122) ([lthaooo](https://github.com/lthaooo)). -* Enable the vectorscan library on ARM, this speeds up regexp evaluation. [#41033](https://github.com/ClickHouse/ClickHouse/pull/41033) ([Robert Schulze](https://github.com/rschu1ze)). -* Upgrade vectorscan to 5.4.8 which has many performance optimizations to speed up regexp evaluation. [#41270](https://github.com/ClickHouse/ClickHouse/pull/41270) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix incorrect fallback to skip the local filesystem cache for VFS (like S3) which happened on very high concurrency level. [#40420](https://github.com/ClickHouse/ClickHouse/pull/40420) ([Kseniia Sumarokova](https://github.com/kssenii)). -* If row policy filter is always false, return empty result immediately without reading any data. This closes [#24012](https://github.com/ClickHouse/ClickHouse/issues/24012). [#40740](https://github.com/ClickHouse/ClickHouse/pull/40740) ([Amos Bird](https://github.com/amosbird)). -* Parallel hash JOIN for Float data types might be suboptimal. Make it better. [#41183](https://github.com/ClickHouse/ClickHouse/pull/41183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not load inactive parts at startup of `MergeTree` tables. [#42181](https://github.com/ClickHouse/ClickHouse/pull/42181) ([Anton Popov](https://github.com/CurtizJ)). +* Improved latency of reading from storage `S3` and table function `s3` with large numbers of small files. Now settings `remote_filesystem_read_method` and `remote_filesystem_read_prefetch` take effect while reading from storage `S3`. [#43726](https://github.com/ClickHouse/ClickHouse/pull/43726) ([Anton Popov](https://github.com/CurtizJ)). +* Optimization for reading struct fields in Parquet/ORC files. Only the required fields are loaded. [#44484](https://github.com/ClickHouse/ClickHouse/pull/44484) ([lgbo](https://github.com/lgbo-ustc)). +* Two-level aggregation algorithm was mistakenly disabled for queries over the HTTP interface. It was enabled back, and it leads to a major performance improvement. [#45450](https://github.com/ClickHouse/ClickHouse/pull/45450) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Added mmap support for StorageFile, which should improve the performance of clickhouse-local. [#43927](https://github.com/ClickHouse/ClickHouse/pull/43927) ([pufit](https://github.com/pufit)). +* Added sharding support in HashedDictionary to allow parallel load (almost linear scaling based on number of shards). [#40003](https://github.com/ClickHouse/ClickHouse/pull/40003) ([Azat Khuzhin](https://github.com/azat)). +* Speed up query parsing. [#42284](https://github.com/ClickHouse/ClickHouse/pull/42284) ([Raúl Marín](https://github.com/Algunenano)). +* Always replace OR chain `expr = x1 OR ... OR expr = xN` to `expr IN (x1, ..., xN)` in the case where `expr` is a `LowCardinality` column. Setting `optimize_min_equality_disjunction_chain_length` is ignored in this case. [#42889](https://github.com/ClickHouse/ClickHouse/pull/42889) ([Guo Wangyang](https://github.com/guowangy)). +* Slightly improve performance by optimizing the code around ThreadStatus. [#43586](https://github.com/ClickHouse/ClickHouse/pull/43586) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Optimize the column-wise ternary logic evaluation by achieving auto-vectorization. In the performance test of this [microbenchmark](https://github.com/ZhiguoZh/ClickHouse/blob/20221123-ternary-logic-opt-example/src/Functions/examples/associative_applier_perf.cpp), we've observed a peak **performance gain** of **21x** on the ICX device (Intel Xeon Platinum 8380 CPU). [#43669](https://github.com/ClickHouse/ClickHouse/pull/43669) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Avoid acquiring read locks in the `system.tables` table if possible. [#43840](https://github.com/ClickHouse/ClickHouse/pull/43840) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize ThreadPool. The performance experiments of SSB (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could effectively decrease the lock contention for ThreadPoolImpl::mutex by **75%**, increasing the CPU utilization and improving the overall performance by **2.4%**. [#44308](https://github.com/ClickHouse/ClickHouse/pull/44308) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Now the optimisation for predicting the hash table size is applied only if the cached hash table size is sufficiently large (thresholds were determined empirically and hardcoded). [#44455](https://github.com/ClickHouse/ClickHouse/pull/44455) ([Nikita Taranov](https://github.com/nickitat)). +* Small performance improvement for asynchronous reading from remote filesystems. [#44868](https://github.com/ClickHouse/ClickHouse/pull/44868) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add fast path for: - `col like '%%'`; - `col like '%'`; - `col not like '%'`; - `col not like '%'`; - `match(col, '.*')`. [#45244](https://github.com/ClickHouse/ClickHouse/pull/45244) ([李扬](https://github.com/taiyang-li)). +* Slightly improve happy path optimisation in filtering (WHERE clause). [#45289](https://github.com/ClickHouse/ClickHouse/pull/45289) ([Nikita Taranov](https://github.com/nickitat)). +* Provide monotonicity info for `toUnixTimestamp64*` to enable more algebraic optimizations for index analysis. [#44116](https://github.com/ClickHouse/ClickHouse/pull/44116) ([Nikita Taranov](https://github.com/nickitat)). +* Allow the configuration of temporary data for query processing (spilling to disk) to cooperate with the filesystem cache (taking up the space from the cache disk) [#43972](https://github.com/ClickHouse/ClickHouse/pull/43972) ([Vladimir C](https://github.com/vdimir)). This mainly improves [ClickHouse Cloud](https://clickhouse.cloud/), but can be used for self-managed setups as well, if you know what to do. +* Make `system.replicas` table do parallel fetches of replicas statuses. Closes [#43918](https://github.com/ClickHouse/ClickHouse/issues/43918). [#43998](https://github.com/ClickHouse/ClickHouse/pull/43998) ([Nikolay Degterinsky](https://github.com/evillique)). +* Optimize memory consumption during backup to S3: files to S3 now will be copied directly without using `WriteBufferFromS3` (which could use a lot of memory). [#45188](https://github.com/ClickHouse/ClickHouse/pull/45188) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add a cache for async block ids. This will reduce the number of requests of ZooKeeper when we enable async inserts deduplication. [#45106](https://github.com/ClickHouse/ClickHouse/pull/45106) ([Han Fei](https://github.com/hanfei1991)). #### Improvement -* During startup and ATTACH call, `ReplicatedMergeTree` tables will be readonly until the ZooKeeper connection is made and the setup is finished. [#40148](https://github.com/ClickHouse/ClickHouse/pull/40148) ([Antonio Andelic](https://github.com/antonio2368)). -* Add `enable_extended_results_for_datetime_functions` option to return results of type Date32 for functions toStartOfYear, toStartOfISOYear, toStartOfQuarter, toStartOfMonth, toStartOfWeek, toMonday and toLastDayOfMonth when argument is Date32 or DateTime64, otherwise results of Date type are returned. For compatibility reasons default value is ‘0’. [#41214](https://github.com/ClickHouse/ClickHouse/pull/41214) ([Roman Vasin](https://github.com/rvasin)). -* For security and stability reasons, CatBoost models are no longer evaluated within the ClickHouse server. Instead, the evaluation is now done in the clickhouse-library-bridge, a separate process that loads the catboost library and communicates with the server process via HTTP. [#40897](https://github.com/ClickHouse/ClickHouse/pull/40897) ([Robert Schulze](https://github.com/rschu1ze)). [#39629](https://github.com/ClickHouse/ClickHouse/pull/39629) ([Robert Schulze](https://github.com/rschu1ze)). -* Add more metrics for on-disk temporary data, close [#40206](https://github.com/ClickHouse/ClickHouse/issues/40206). [#40239](https://github.com/ClickHouse/ClickHouse/pull/40239) ([Vladimir C](https://github.com/vdimir)). -* Add config option `warning_supress_regexp`, close [#40330](https://github.com/ClickHouse/ClickHouse/issues/40330). [#40548](https://github.com/ClickHouse/ClickHouse/pull/40548) ([Vladimir C](https://github.com/vdimir)). -* Add setting to disable limit on kafka_num_consumers. Closes [#40331](https://github.com/ClickHouse/ClickHouse/issues/40331). [#40670](https://github.com/ClickHouse/ClickHouse/pull/40670) ([Kruglov Pavel](https://github.com/Avogar)). -* Support `SETTINGS` in `DELETE ...` query. [#41533](https://github.com/ClickHouse/ClickHouse/pull/41533) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Detailed S3 profile events `DiskS3*` per S3 API call split for S3 ObjectStorage. [#41532](https://github.com/ClickHouse/ClickHouse/pull/41532) ([Sergei Trifonov](https://github.com/serxa)). -* Two new metrics in `system.asynchronous_metrics`. `NumberOfDetachedParts` and `NumberOfDetachedByUserParts`. [#40779](https://github.com/ClickHouse/ClickHouse/pull/40779) ([Sema Checherinda](https://github.com/CheSema)). -* Allow CONSTRAINTs for ODBC and JDBC tables. [#34551](https://github.com/ClickHouse/ClickHouse/pull/34551) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Don't print `SETTINGS` more than once during query formatting if it didn't appear multiple times in the original query. [#38900](https://github.com/ClickHouse/ClickHouse/pull/38900) ([Raúl Marín](https://github.com/Algunenano)). -* Improve the tracing (OpenTelemetry) context propagation across threads. [#39010](https://github.com/ClickHouse/ClickHouse/pull/39010) ([Frank Chen](https://github.com/FrankChen021)). -* ClickHouse Keeper: add listeners for `interserver_listen_host` only in Keeper if specified. [#39973](https://github.com/ClickHouse/ClickHouse/pull/39973) ([Antonio Andelic](https://github.com/antonio2368)). -* Improve recovery of Replicated user access storage after errors. [#39977](https://github.com/ClickHouse/ClickHouse/pull/39977) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add support for TTL in `EmbeddedRocksDB`. [#39986](https://github.com/ClickHouse/ClickHouse/pull/39986) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). -* Add schema inference to `clickhouse-obfuscator`, so the `--structure` argument is no longer required. [#40120](https://github.com/ClickHouse/ClickHouse/pull/40120) ([Nikolay Degterinsky](https://github.com/evillique)). -* Improve and fix dictionaries in `Arrow` format. [#40173](https://github.com/ClickHouse/ClickHouse/pull/40173) ([Kruglov Pavel](https://github.com/Avogar)). -* More natural conversion of `Date32`, `DateTime64`, `Date` to narrower types: upper or lower normal value is considered when out of normal range. [#40217](https://github.com/ClickHouse/ClickHouse/pull/40217) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix the case when `Merge` table over `View` cannot use index. [#40233](https://github.com/ClickHouse/ClickHouse/pull/40233) ([Duc Canh Le](https://github.com/canhld94)). -* Custom key names for JSON server logs. [#40251](https://github.com/ClickHouse/ClickHouse/pull/40251) ([Mallik Hassan](https://github.com/SadiHassan)). -* It is now possible to set a custom error code for the exception thrown by function `throwIf`. [#40319](https://github.com/ClickHouse/ClickHouse/pull/40319) ([Robert Schulze](https://github.com/rschu1ze)). -* Improve schema inference cache, respect format settings that can change the schema. [#40414](https://github.com/ClickHouse/ClickHouse/pull/40414) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow parsing `Date` as `DateTime` and `DateTime64`. This implements the enhancement proposed in [#36949](https://github.com/ClickHouse/ClickHouse/issues/36949). [#40474](https://github.com/ClickHouse/ClickHouse/pull/40474) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow conversion from `String` with `DateTime64` like `2022-08-22 01:02:03.456` to `Date` and `Date32`. Allow conversion from String with DateTime like `2022-08-22 01:02:03` to `Date32`. This closes [#39598](https://github.com/ClickHouse/ClickHouse/issues/39598). [#40475](https://github.com/ClickHouse/ClickHouse/pull/40475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Better support for nested data structures in Parquet format [#40485](https://github.com/ClickHouse/ClickHouse/pull/40485) ([Arthur Passos](https://github.com/arthurpassos)). -* Support reading Array(Record) into flatten nested table in Avro. [#40534](https://github.com/ClickHouse/ClickHouse/pull/40534) ([Kruglov Pavel](https://github.com/Avogar)). -* Add read-only support for `EmbeddedRocksDB`. [#40543](https://github.com/ClickHouse/ClickHouse/pull/40543) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). -* Validate the compression method parameter of URL table engine. [#40600](https://github.com/ClickHouse/ClickHouse/pull/40600) ([Frank Chen](https://github.com/FrankChen021)). -* Better format detection for url table function/engine in presence of a query string after a file name. Closes [#40315](https://github.com/ClickHouse/ClickHouse/issues/40315). [#40636](https://github.com/ClickHouse/ClickHouse/pull/40636) ([Kruglov Pavel](https://github.com/Avogar)). -* Disable projection when grouping set is used. It generated wrong result. This fixes [#40635](https://github.com/ClickHouse/ClickHouse/issues/40635). [#40726](https://github.com/ClickHouse/ClickHouse/pull/40726) ([Amos Bird](https://github.com/amosbird)). -* Fix incorrect format of `APPLY` column transformer which can break metadata if used in table definition. This fixes [#37590](https://github.com/ClickHouse/ClickHouse/issues/37590). [#40727](https://github.com/ClickHouse/ClickHouse/pull/40727) ([Amos Bird](https://github.com/amosbird)). -* Support the `%z` descriptor for formatting the timezone offset in `formatDateTime`. [#40736](https://github.com/ClickHouse/ClickHouse/pull/40736) ([Cory Levy](https://github.com/LevyCory)). -* The interactive mode in `clickhouse-client` now interprets `.` and `/` as "run the last command". [#40750](https://github.com/ClickHouse/ClickHouse/pull/40750) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168). [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Create status file for filesystem cache directory to make sure that cache directories are not shared between different servers or caches. [#40820](https://github.com/ClickHouse/ClickHouse/pull/40820) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add support for `DELETE` and `UPDATE` for `EmbeddedRocksDB` storage. [#40853](https://github.com/ClickHouse/ClickHouse/pull/40853) ([Antonio Andelic](https://github.com/antonio2368)). -* ClickHouse Keeper: fix shutdown during long commit and increase allowed request size. [#40941](https://github.com/ClickHouse/ClickHouse/pull/40941) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix race in WriteBufferFromS3, add TSA annotations. [#40950](https://github.com/ClickHouse/ClickHouse/pull/40950) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Grouping sets with group_by_use_nulls should only convert key columns to nullable. [#40997](https://github.com/ClickHouse/ClickHouse/pull/40997) ([Duc Canh Le](https://github.com/canhld94)). -* Improve the observability of INSERT on distributed table. [#41034](https://github.com/ClickHouse/ClickHouse/pull/41034) ([Frank Chen](https://github.com/FrankChen021)). -* More low-level metrics for S3 interaction. [#41039](https://github.com/ClickHouse/ClickHouse/pull/41039) ([mateng915](https://github.com/mateng0915)). -* Support relative path in Location header after HTTP redirect. Closes [#40985](https://github.com/ClickHouse/ClickHouse/issues/40985). [#41162](https://github.com/ClickHouse/ClickHouse/pull/41162) ([Kruglov Pavel](https://github.com/Avogar)). -* Apply changes to HTTP handlers on fly without server restart. [#41177](https://github.com/ClickHouse/ClickHouse/pull/41177) ([Azat Khuzhin](https://github.com/azat)). -* ClickHouse Keeper: properly close active sessions during shutdown. [#41215](https://github.com/ClickHouse/ClickHouse/pull/41215) ([Antonio Andelic](https://github.com/antonio2368)). This lowers the period of "table is read-only" errors. -* Add ability to automatically comment SQL queries in clickhouse-client/local (with `Alt-#`, like in readline). [#41224](https://github.com/ClickHouse/ClickHouse/pull/41224) ([Azat Khuzhin](https://github.com/azat)). -* Fix incompatibility of cache after switching setting `do_no_evict_index_and_mark_files` from 1 to 0, 0 to 1. [#41330](https://github.com/ClickHouse/ClickHouse/pull/41330) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add a setting `allow_suspicious_fixed_string_types` to prevent users from creating columns of type FixedString with size > 256. [#41495](https://github.com/ClickHouse/ClickHouse/pull/41495) ([Duc Canh Le](https://github.com/canhld94)). -* Add `has_lightweight_delete` to system.parts. [#41564](https://github.com/ClickHouse/ClickHouse/pull/41564) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Use structure from insertion table in generateRandom without arguments. [#45239](https://github.com/ClickHouse/ClickHouse/pull/45239) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow to implicitly convert floats stored in string fields of JSON to integers in `JSONExtract` functions. E.g. `JSONExtract('{"a": "1000.111"}', 'a', 'UInt64')` -> `1000`, previously it returned 0. [#45432](https://github.com/ClickHouse/ClickHouse/pull/45432) ([Anton Popov](https://github.com/CurtizJ)). +* Added fields `supports_parallel_parsing` and `supports_parallel_formatting` to table `system.formats` for better introspection. [#45499](https://github.com/ClickHouse/ClickHouse/pull/45499) ([Anton Popov](https://github.com/CurtizJ)). +* Improve reading CSV field in CustomSeparated/Template format. Closes [#42352](https://github.com/ClickHouse/ClickHouse/issues/42352) Closes [#39620](https://github.com/ClickHouse/ClickHouse/issues/39620). [#43332](https://github.com/ClickHouse/ClickHouse/pull/43332) ([Kruglov Pavel](https://github.com/Avogar)). +* Unify query elapsed time measurements. [#43455](https://github.com/ClickHouse/ClickHouse/pull/43455) ([Raúl Marín](https://github.com/Algunenano)). +* Improve automatic usage of structure from insertion table in table functions file/hdfs/s3 when virtual columns are present in a select query, it fixes the possible error `Block structure mismatch` or `number of columns mismatch`. [#43695](https://github.com/ClickHouse/ClickHouse/pull/43695) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for signed arguments in the function `range`. Fixes [#43333](https://github.com/ClickHouse/ClickHouse/issues/43333). [#43733](https://github.com/ClickHouse/ClickHouse/pull/43733) ([sanyu](https://github.com/wineternity)). +* Remove redundant sorting, for example, sorting related ORDER BY clauses in subqueries. Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding `ORDER BY` clauses, but more generic, since it's applied to any redundant sorting steps (not only caused by ORDER BY clause) and applied to subqueries of any depth. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#43905](https://github.com/ClickHouse/ClickHouse/pull/43905) ([Igor Nikonov](https://github.com/devcrafter)). +* Add the ability to disable deduplication of files for BACKUP (for backups without deduplication ATTACH can be used instead of full RESTORE). For example `BACKUP foo TO S3(...) SETTINGS deduplicate_files=0` (default `deduplicate_files=1`). [#43947](https://github.com/ClickHouse/ClickHouse/pull/43947) ([Azat Khuzhin](https://github.com/azat)). +* Refactor and improve schema inference for text formats. Add new setting `schema_inference_make_columns_nullable` that controls making result types `Nullable` (enabled by default);. [#44019](https://github.com/ClickHouse/ClickHouse/pull/44019) ([Kruglov Pavel](https://github.com/Avogar)). +* Better support for `PROXYv1` protocol. [#44135](https://github.com/ClickHouse/ClickHouse/pull/44135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add information about the latest part check by cleanup threads into `system.parts` table. [#44244](https://github.com/ClickHouse/ClickHouse/pull/44244) ([Dmitry Novik](https://github.com/novikd)). +* Disable table functions in readonly mode for inserts. [#44290](https://github.com/ClickHouse/ClickHouse/pull/44290) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add a setting `simultaneous_parts_removal_limit` to allow limiting the number of parts being processed by one iteration of CleanupThread. [#44461](https://github.com/ClickHouse/ClickHouse/pull/44461) ([Dmitry Novik](https://github.com/novikd)). +* Do not initialize ReadBufferFromS3 when only virtual columns are needed in a query. This may be helpful to [#44246](https://github.com/ClickHouse/ClickHouse/issues/44246). [#44493](https://github.com/ClickHouse/ClickHouse/pull/44493) ([chen](https://github.com/xiedeyantu)). +* Prevent duplicate column names hints. Closes [#44130](https://github.com/ClickHouse/ClickHouse/issues/44130). [#44519](https://github.com/ClickHouse/ClickHouse/pull/44519) ([Joanna Hulboj](https://github.com/jh0x)). +* Allow macro substitution in endpoint of disks. Resolve [#40951](https://github.com/ClickHouse/ClickHouse/issues/40951). [#44533](https://github.com/ClickHouse/ClickHouse/pull/44533) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Improve schema inference when `input_format_json_read_object_as_string` is enabled. [#44546](https://github.com/ClickHouse/ClickHouse/pull/44546) ([Kruglov Pavel](https://github.com/Avogar)). +* Add a user-level setting `database_replicated_allow_replicated_engine_arguments` which allows banning the creation of `ReplicatedMergeTree` tables with arguments in `DatabaseReplicated`. [#44566](https://github.com/ClickHouse/ClickHouse/pull/44566) ([alesapin](https://github.com/alesapin)). +* Prevent users from mistakenly specifying zero (invalid) value for `index_granularity`. This closes [#44536](https://github.com/ClickHouse/ClickHouse/issues/44536). [#44578](https://github.com/ClickHouse/ClickHouse/pull/44578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added possibility to set path to service keytab file in `keytab` parameter in `kerberos` section of config.xml. [#44594](https://github.com/ClickHouse/ClickHouse/pull/44594) ([Roman Vasin](https://github.com/rvasin)). +* Use already written part of the query for fuzzy search (pass to the `skim` library, which is written in Rust and linked statically to ClickHouse). [#44600](https://github.com/ClickHouse/ClickHouse/pull/44600) ([Azat Khuzhin](https://github.com/azat)). +* Enable `input_format_json_read_objects_as_strings` by default to be able to read nested JSON objects while JSON Object type is experimental. [#44657](https://github.com/ClickHouse/ClickHouse/pull/44657) ([Kruglov Pavel](https://github.com/Avogar)). +* Improvement for deduplication of async inserts: when users do duplicate async inserts, we should deduplicate inside the memory before we query Keeper. [#44682](https://github.com/ClickHouse/ClickHouse/pull/44682) ([Han Fei](https://github.com/hanfei1991)). +* Input/ouptut `Avro` format will parse bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)). +* Support Bool type in Arrow/Parquet/ORC. Closes [#43970](https://github.com/ClickHouse/ClickHouse/issues/43970). [#44698](https://github.com/ClickHouse/ClickHouse/pull/44698) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't greedily parse beyond the quotes when reading UUIDs - it may lead to mistakenly successful parsing of incorrect data. [#44686](https://github.com/ClickHouse/ClickHouse/pull/44686) ([Raúl Marín](https://github.com/Algunenano)). +* Infer UInt64 in case of Int64 overflow and fix some transforms in schema inference. [#44696](https://github.com/ClickHouse/ClickHouse/pull/44696) ([Kruglov Pavel](https://github.com/Avogar)). +* Previously dependency resolving inside `Replicated` database was done in a hacky way, and now it's done right using an explicit graph. [#44697](https://github.com/ClickHouse/ClickHouse/pull/44697) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `output_format_pretty_row_numbers` does not preserve the counter across the blocks. Closes [#44815](https://github.com/ClickHouse/ClickHouse/issues/44815). [#44832](https://github.com/ClickHouse/ClickHouse/pull/44832) ([flynn](https://github.com/ucasfl)). +* Don't report errors in `system.errors` due to parts being merged concurrently with the background cleanup process. [#44874](https://github.com/ClickHouse/ClickHouse/pull/44874) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize and fix metrics for Distributed async INSERT. [#44922](https://github.com/ClickHouse/ClickHouse/pull/44922) ([Azat Khuzhin](https://github.com/azat)). +* Added settings to disallow concurrent backups and restores resolves [#43891](https://github.com/ClickHouse/ClickHouse/issues/43891) Implementation: * Added server-level settings to disallow concurrent backups and restores, which are read and set when BackupWorker is created in Context. * Settings are set to true by default. * Before starting backup or restores, added a check to see if any other backups/restores are running. For internal requests, it checks if it is from the self node using backup_uuid. [#45072](https://github.com/ClickHouse/ClickHouse/pull/45072) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add `` config parameter for system logs. [#45320](https://github.com/ClickHouse/ClickHouse/pull/45320) ([Stig Bakken](https://github.com/stigsb)). #### Build/Testing/Packaging Improvement - -* Enforce documentation for every setting. [#40644](https://github.com/ClickHouse/ClickHouse/pull/40644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Enforce documentation for every current metric. [#40645](https://github.com/ClickHouse/ClickHouse/pull/40645) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Enforce documentation for every profile event counter. Write the documentation where it was missing. [#40646](https://github.com/ClickHouse/ClickHouse/pull/40646) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow minimal `clickhouse-local` build by correcting some dependencies. [#40460](https://github.com/ClickHouse/ClickHouse/pull/40460) ([Alexey Milovidov](https://github.com/alexey-milovidov)). It is less than 50 MiB. -* Calculate and report SQL function coverage in tests. [#40593](https://github.com/ClickHouse/ClickHouse/issues/40593). [#40647](https://github.com/ClickHouse/ClickHouse/pull/40647) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Enforce documentation for every MergeTree setting. [#40648](https://github.com/ClickHouse/ClickHouse/pull/40648) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* A prototype of embedded reference documentation for high-level uniform server components. [#40649](https://github.com/ClickHouse/ClickHouse/pull/40649) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* We will check all queries from the changed perf tests to ensure that all changed queries were tested. [#40322](https://github.com/ClickHouse/ClickHouse/pull/40322) ([Nikita Taranov](https://github.com/nickitat)). -* Fix TGZ packages. [#40681](https://github.com/ClickHouse/ClickHouse/pull/40681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Fix debug symbols. [#40873](https://github.com/ClickHouse/ClickHouse/pull/40873) ([Azat Khuzhin](https://github.com/azat)). -* Extended the CI configuration to create a x86 SSE2-only build. Useful for old or embedded hardware. [#40999](https://github.com/ClickHouse/ClickHouse/pull/40999) ([Robert Schulze](https://github.com/rschu1ze)). -* Switch to llvm/clang 15. [#41046](https://github.com/ClickHouse/ClickHouse/pull/41046) ([Azat Khuzhin](https://github.com/azat)). -* Continuation of [#40938](https://github.com/ClickHouse/ClickHouse/issues/40938). Fix ODR violation for `Loggers` class. Fixes [#40398](https://github.com/ClickHouse/ClickHouse/issues/40398), [#40937](https://github.com/ClickHouse/ClickHouse/issues/40937). [#41060](https://github.com/ClickHouse/ClickHouse/pull/41060) ([Dmitry Novik](https://github.com/novikd)). -* Add macOS binaries to GitHub release assets, it fixes [#37718](https://github.com/ClickHouse/ClickHouse/issues/37718). [#41088](https://github.com/ClickHouse/ClickHouse/pull/41088) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* The c-ares library is now bundled with ClickHouse's build system. [#41239](https://github.com/ClickHouse/ClickHouse/pull/41239) ([Robert Schulze](https://github.com/rschu1ze)). -* Get rid of `dlopen` from the main ClickHouse code. It remains in the library-bridge and odbc-bridge. [#41428](https://github.com/ClickHouse/ClickHouse/pull/41428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Don't allow `dlopen` in the main ClickHouse binary, because it is harmful and insecure. We don't use it. But it can be used by some libraries for the implementation of "plugins". We absolutely discourage the ancient technique of loading 3rd-party uncontrolled dangerous libraries into the process address space, because it is insane. [#41429](https://github.com/ClickHouse/ClickHouse/pull/41429) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `source` field to deb packages, update `nfpm`. [#41531](https://github.com/ClickHouse/ClickHouse/pull/41531) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Support for DWARF-5 in the in-house DWARF parser. [#40710](https://github.com/ClickHouse/ClickHouse/pull/40710) ([Azat Khuzhin](https://github.com/azat)). -* Add fault injection in ZooKeeper client for testing [#30498](https://github.com/ClickHouse/ClickHouse/pull/30498) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add stateless tests with s3 storage with debug and tsan [#35262](https://github.com/ClickHouse/ClickHouse/pull/35262) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Trying stress on top of S3 [#36837](https://github.com/ClickHouse/ClickHouse/pull/36837) ([alesapin](https://github.com/alesapin)). -* Enable `concurrency-mt-unsafe` in `clang-tidy` [#40224](https://github.com/ClickHouse/ClickHouse/pull/40224) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Statically link with the `skim` library (it is written in Rust) for fuzzy search in clickhouse client/local history. [#44239](https://github.com/ClickHouse/ClickHouse/pull/44239) ([Azat Khuzhin](https://github.com/azat)). +* We removed support for shared linking because of Rust. Actually, Rust is only an excuse for this removal, and we wanted to remove it nevertheless. [#44828](https://github.com/ClickHouse/ClickHouse/pull/44828) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `SQLite` library is updated to the latest. It is used for the SQLite database and table integration engines. Also, fixed a false-positive TSan report. This closes [#45027](https://github.com/ClickHouse/ClickHouse/issues/45027). [#45031](https://github.com/ClickHouse/ClickHouse/pull/45031) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CRC-32 changes to address the WeakHash collision issue in PowerPC. [#45144](https://github.com/ClickHouse/ClickHouse/pull/45144) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). +* Update aws-c* submodules [#43020](https://github.com/ClickHouse/ClickHouse/pull/43020) ([Vitaly Baranov](https://github.com/vitlibar)). +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Introduce a [website](https://aretestsgreenyet.com/) for the status of ClickHouse CI. [Source](https://github.com/ClickHouse/aretestsgreenyet). #### Bug Fix -* Fix potential dataloss due to [a bug in AWS SDK](https://github.com/aws/aws-sdk-cpp/issues/658). Bug can be triggered only when clickhouse is used over S3. [#40506](https://github.com/ClickHouse/ClickHouse/pull/40506) ([alesapin](https://github.com/alesapin)). This bug has been open for 5 years in AWS SDK and is closed after our report. -* Malicious data in Native format might cause a crash. [#41441](https://github.com/ClickHouse/ClickHouse/pull/41441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The aggregate function `categorialInformationValue` was having incorrectly defined properties, which might cause a null pointer dereferencing at runtime. This closes [#41443](https://github.com/ClickHouse/ClickHouse/issues/41443). [#41449](https://github.com/ClickHouse/ClickHouse/pull/41449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Writing data in Apache `ORC` format might lead to a buffer overrun. [#41458](https://github.com/ClickHouse/ClickHouse/pull/41458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix memory safety issues with functions `encrypt` and `contingency` if Array of Nullable is used as an argument. This fixes [#41004](https://github.com/ClickHouse/ClickHouse/issues/41004). [#40195](https://github.com/ClickHouse/ClickHouse/pull/40195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix bugs in MergeJoin when 'not_processed' is not null. [#40335](https://github.com/ClickHouse/ClickHouse/pull/40335) ([liql2007](https://github.com/liql2007)). -* Fix incorrect result in case of decimal precision loss in IN operator, ref [#41125](https://github.com/ClickHouse/ClickHouse/issues/41125). [#41130](https://github.com/ClickHouse/ClickHouse/pull/41130) ([Vladimir C](https://github.com/vdimir)). -* Fix filling of missed `Nested` columns with multiple levels. [#37152](https://github.com/ClickHouse/ClickHouse/pull/37152) ([Anton Popov](https://github.com/CurtizJ)). -* Fix SYSTEM UNFREEZE query for Ordinary (deprecated) database. Fix for https://github.com/ClickHouse/ClickHouse/pull/36424. [#38262](https://github.com/ClickHouse/ClickHouse/pull/38262) ([Vadim Volodin](https://github.com/PolyProgrammist)). -* Fix unused unknown columns introduced by WITH statement. This fixes [#37812](https://github.com/ClickHouse/ClickHouse/issues/37812) . [#39131](https://github.com/ClickHouse/ClickHouse/pull/39131) ([Amos Bird](https://github.com/amosbird)). -* Fix query analysis for ORDER BY in presence of window functions. Fixes [#38741](https://github.com/ClickHouse/ClickHouse/issues/38741) Fixes [#24892](https://github.com/ClickHouse/ClickHouse/issues/24892). [#39354](https://github.com/ClickHouse/ClickHouse/pull/39354) ([Dmitry Novik](https://github.com/novikd)). -* Fixed `Unknown identifier (aggregate-function)` exception which appears when a user tries to calculate WINDOW ORDER BY/PARTITION BY expressions over aggregate functions. [#39762](https://github.com/ClickHouse/ClickHouse/pull/39762) ([Vladimir Chebotaryov](https://github.com/quickhouse)). -* Limit number of analyze for one query with setting `max_analyze_depth`. It prevents exponential blow up of analysis time for queries with extraordinarily large number of subqueries. [#40334](https://github.com/ClickHouse/ClickHouse/pull/40334) ([Vladimir C](https://github.com/vdimir)). -* Fix rare bug with column TTL for MergeTree engines family: In case of repeated vertical merge the error `Cannot unlink file ColumnName.bin ... No such file or directory.` could happen. [#40346](https://github.com/ClickHouse/ClickHouse/pull/40346) ([alesapin](https://github.com/alesapin)). -* Use DNS entries for both IPv4 and IPv6 if present. [#40353](https://github.com/ClickHouse/ClickHouse/pull/40353) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow to read snappy compressed files from Hadoop. [#40482](https://github.com/ClickHouse/ClickHouse/pull/40482) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash while parsing values of type `Object` (experimental feature) that contains arrays of variadic dimension. [#40483](https://github.com/ClickHouse/ClickHouse/pull/40483) ([Duc Canh Le](https://github.com/canhld94)). -* Fix settings `input_format_tsv_skip_first_lines`. [#40491](https://github.com/ClickHouse/ClickHouse/pull/40491) ([mini4](https://github.com/mini4)). -* Fix bug (race condition) when starting up MaterializedPostgreSQL database/table engine. [#40262](https://github.com/ClickHouse/ClickHouse/issues/40262). Fix error with reaching limit of relcache_callback_list slots. [#40511](https://github.com/ClickHouse/ClickHouse/pull/40511) ([Maksim Buren](https://github.com/maks-buren630501)). -* Fix possible error 'Decimal math overflow' while parsing DateTime64. [#40546](https://github.com/ClickHouse/ClickHouse/pull/40546) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix vertical merge of parts with lightweight deleted rows. [#40559](https://github.com/ClickHouse/ClickHouse/pull/40559) ([Alexander Gololobov](https://github.com/davenger)). -* Fix segment fault when writing data to URL table engine if it enables compression. [#40565](https://github.com/ClickHouse/ClickHouse/pull/40565) ([Frank Chen](https://github.com/FrankChen021)). -* Fix possible logical error `'Invalid Field get from type UInt64 to type String'` in arrayElement function with Map. [#40572](https://github.com/ClickHouse/ClickHouse/pull/40572) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible race in filesystem cache. [#40586](https://github.com/ClickHouse/ClickHouse/pull/40586) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Removed skipping of mutations in unaffected partitions of `MergeTree` tables, because this feature never worked correctly and might cause resurrection of finished mutations. [#40589](https://github.com/ClickHouse/ClickHouse/pull/40589) ([Alexander Tokmakov](https://github.com/tavplubix)). -* The clickhouse server will crash if we add a grpc port which has been occupied to the configuration in runtime. [#40597](https://github.com/ClickHouse/ClickHouse/pull/40597) ([何李夫](https://github.com/helifu)). -* Fix `base58Encode / base58Decode` handling leading 0 / '1'. [#40620](https://github.com/ClickHouse/ClickHouse/pull/40620) ([Andrey Zvonov](https://github.com/zvonand)). -* keeper-fix: fix race in accessing logs while snapshot is being installed. [#40627](https://github.com/ClickHouse/ClickHouse/pull/40627) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix short circuit execution of toFixedString function. Solves (partially) [#40622](https://github.com/ClickHouse/ClickHouse/issues/40622). [#40628](https://github.com/ClickHouse/ClickHouse/pull/40628) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixes SQLite int8 column conversion to int64 column in ClickHouse. Fixes [#40639](https://github.com/ClickHouse/ClickHouse/issues/40639). [#40642](https://github.com/ClickHouse/ClickHouse/pull/40642) ([Barum Rho](https://github.com/barumrho)). -* Fix stack overflow in recursive `Buffer` tables. This closes [#40637](https://github.com/ClickHouse/ClickHouse/issues/40637). [#40643](https://github.com/ClickHouse/ClickHouse/pull/40643) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* During insertion of a new query to the `ProcessList` allocations happen. If we reach the memory limit during these allocations we can not use `OvercommitTracker`, because `ProcessList::mutex` is already acquired. Fixes [#40611](https://github.com/ClickHouse/ClickHouse/issues/40611). [#40677](https://github.com/ClickHouse/ClickHouse/pull/40677) ([Dmitry Novik](https://github.com/novikd)). -* Fix LOGICAL_ERROR with max_read_buffer_size=0 during reading marks. [#40705](https://github.com/ClickHouse/ClickHouse/pull/40705) ([Azat Khuzhin](https://github.com/azat)). -* Fix memory leak while pushing to MVs w/o query context (from Kafka/...). [#40732](https://github.com/ClickHouse/ClickHouse/pull/40732) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible error Attempt to read after eof in CSV schema inference. [#40746](https://github.com/ClickHouse/ClickHouse/pull/40746) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix logical error in write-through cache "File segment completion can be done only by downloader". Closes [#40748](https://github.com/ClickHouse/ClickHouse/issues/40748). [#40759](https://github.com/ClickHouse/ClickHouse/pull/40759) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Make the result of GROUPING function the same as in SQL and other DBMS. [#40762](https://github.com/ClickHouse/ClickHouse/pull/40762) ([Dmitry Novik](https://github.com/novikd)). -* In [#40595](https://github.com/ClickHouse/ClickHouse/issues/40595) it was reported that the `host_regexp` functionality was not working properly with a name to address resolution in `/etc/hosts`. It's fixed. [#40769](https://github.com/ClickHouse/ClickHouse/pull/40769) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix incremental backups for Log family. [#40827](https://github.com/ClickHouse/ClickHouse/pull/40827) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix extremely rare bug which can lead to potential data loss in zero-copy replication. [#40844](https://github.com/ClickHouse/ClickHouse/pull/40844) ([alesapin](https://github.com/alesapin)). -* Fix key condition analyzing crashes when same set expression built from different column(s). [#40850](https://github.com/ClickHouse/ClickHouse/pull/40850) ([Duc Canh Le](https://github.com/canhld94)). -* Fix nested JSON Objects schema inference. [#40851](https://github.com/ClickHouse/ClickHouse/pull/40851) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix 3-digit prefix directory for filesystem cache files not being deleted if empty. Closes [#40797](https://github.com/ClickHouse/ClickHouse/issues/40797). [#40867](https://github.com/ClickHouse/ClickHouse/pull/40867) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix uncaught DNS_ERROR on failed connection to replicas. [#40881](https://github.com/ClickHouse/ClickHouse/pull/40881) ([Robert Coelho](https://github.com/coelho)). -* Fix bug when removing unneeded columns in subquery. [#40884](https://github.com/ClickHouse/ClickHouse/pull/40884) ([luocongkai](https://github.com/TKaxe)). -* Fix extra memory allocation for remote read buffers. [#40896](https://github.com/ClickHouse/ClickHouse/pull/40896) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed a behaviour when user with explicitly revoked grant for dropping databases can still drop it. [#40906](https://github.com/ClickHouse/ClickHouse/pull/40906) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* A fix for ClickHouse Keeper: correctly compare paths in write requests to Keeper internal system node paths. [#40918](https://github.com/ClickHouse/ClickHouse/pull/40918) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix deadlock in WriteBufferFromS3. [#40943](https://github.com/ClickHouse/ClickHouse/pull/40943) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix access rights for `DESCRIBE TABLE url()` and some other `DESCRIBE TABLE ()`. [#40975](https://github.com/ClickHouse/ClickHouse/pull/40975) ([Vitaly Baranov](https://github.com/vitlibar)). -* Remove wrong parser logic for `WITH GROUPING SETS` which may lead to nullptr dereference. [#41049](https://github.com/ClickHouse/ClickHouse/pull/41049) ([Duc Canh Le](https://github.com/canhld94)). -* A fix for ClickHouse Keeper: fix possible segfault during Keeper shutdown. [#41075](https://github.com/ClickHouse/ClickHouse/pull/41075) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix possible segfaults, use-heap-after-free and memory leak in aggregate function combinators. Closes [#40848](https://github.com/ClickHouse/ClickHouse/issues/40848). [#41083](https://github.com/ClickHouse/ClickHouse/pull/41083) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix query_views_log with Window views. [#41132](https://github.com/ClickHouse/ClickHouse/pull/41132) ([Raúl Marín](https://github.com/Algunenano)). -* Disables optimize_monotonous_functions_in_order_by by default, mitigates: [#40094](https://github.com/ClickHouse/ClickHouse/issues/40094). [#41136](https://github.com/ClickHouse/ClickHouse/pull/41136) ([Denny Crane](https://github.com/den-crane)). -* Fixed "possible deadlock avoided" error on automatic conversion of database engine from Ordinary to Atomic. [#41146](https://github.com/ClickHouse/ClickHouse/pull/41146) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix SIGSEGV in SortedBlocksWriter in case of empty block (possible to get with `optimize_aggregation_in_order` and `join_algorithm=auto`). [#41154](https://github.com/ClickHouse/ClickHouse/pull/41154) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect query result when trivial count optimization is in effect with array join. This fixes [#39431](https://github.com/ClickHouse/ClickHouse/issues/39431). [#41158](https://github.com/ClickHouse/ClickHouse/pull/41158) ([Denny Crane](https://github.com/den-crane)). -* Fix stack-use-after-return in GetPriorityForLoadBalancing::getPriorityFunc(). [#41159](https://github.com/ClickHouse/ClickHouse/pull/41159) ([Azat Khuzhin](https://github.com/azat)). -* Fix positional arguments exception Positional argument out of bounds. Closes [#40634](https://github.com/ClickHouse/ClickHouse/issues/40634). [#41189](https://github.com/ClickHouse/ClickHouse/pull/41189) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix background clean up of broken detached parts. [#41190](https://github.com/ClickHouse/ClickHouse/pull/41190) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix exponential query rewrite in case of lots of cross joins with where, close [#21557](https://github.com/ClickHouse/ClickHouse/issues/21557). [#41223](https://github.com/ClickHouse/ClickHouse/pull/41223) ([Vladimir C](https://github.com/vdimir)). -* Fix possible logical error in write-through cache, which happened because not all types of exception were handled as needed. Closes [#41208](https://github.com/ClickHouse/ClickHouse/issues/41208). [#41232](https://github.com/ClickHouse/ClickHouse/pull/41232) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix String log entry in system.filesystem_cache_log. [#41233](https://github.com/ClickHouse/ClickHouse/pull/41233) ([jmimbrero](https://github.com/josemimbrero-tinybird)). -* Queries with `OFFSET` clause in subquery and `WHERE` clause in outer query might return incorrect result, it's fixed. Fixes [#40416](https://github.com/ClickHouse/ClickHouse/issues/40416). [#41280](https://github.com/ClickHouse/ClickHouse/pull/41280) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix possible wrong query result with `query_plan_optimize_primary_key` enabled. Fixes [#40599](https://github.com/ClickHouse/ClickHouse/issues/40599). [#41281](https://github.com/ClickHouse/ClickHouse/pull/41281) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Do not allow invalid sequences influence other rows in lowerUTF8/upperUTF8. [#41286](https://github.com/ClickHouse/ClickHouse/pull/41286) ([Azat Khuzhin](https://github.com/azat)). -* Fix `ALTER ADD COLUMN` queries with columns of type `Object`. [#41290](https://github.com/ClickHouse/ClickHouse/pull/41290) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed "No node" error when selecting from `system.distributed_ddl_queue` when there's no `distributed_ddl.path` in config. Fixes [#41096](https://github.com/ClickHouse/ClickHouse/issues/41096). [#41296](https://github.com/ClickHouse/ClickHouse/pull/41296) ([young scott](https://github.com/young-scott)). -* Fix incorrect logical error `Expected relative path` in disk object storage. Related to [#41246](https://github.com/ClickHouse/ClickHouse/issues/41246). [#41297](https://github.com/ClickHouse/ClickHouse/pull/41297) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add column type check before UUID insertion in MsgPack format. [#41309](https://github.com/ClickHouse/ClickHouse/pull/41309) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible crash after inserting asynchronously (with enabled setting `async_insert`) malformed data to columns of type `Object`. It could happen, if JSONs in all batches of async inserts were invalid and could not be parsed. [#41336](https://github.com/ClickHouse/ClickHouse/pull/41336) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible deadlock with async_socket_for_remote/use_hedged_requests and parallel KILL. [#41343](https://github.com/ClickHouse/ClickHouse/pull/41343) ([Azat Khuzhin](https://github.com/azat)). -* Disables optimize_rewrite_sum_if_to_count_if by default, mitigates: [#38605](https://github.com/ClickHouse/ClickHouse/issues/38605) [#38683](https://github.com/ClickHouse/ClickHouse/issues/38683). [#41388](https://github.com/ClickHouse/ClickHouse/pull/41388) ([Denny Crane](https://github.com/den-crane)). -* Since 22.8 `ON CLUSTER` clause is ignored if database is `Replicated` and cluster name and database name are the same. Because of this `DROP PARTITION ON CLUSTER` worked unexpected way with `Replicated`. It's fixed, now `ON CLUSTER` clause is ignored only for queries that are replicated on database level. Fixes [#41299](https://github.com/ClickHouse/ClickHouse/issues/41299). [#41390](https://github.com/ClickHouse/ClickHouse/pull/41390) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix possible hung/deadlock on query cancellation (`KILL QUERY` or server shutdown). [#41467](https://github.com/ClickHouse/ClickHouse/pull/41467) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible server crash when using the JBOD feature. This fixes [#41365](https://github.com/ClickHouse/ClickHouse/issues/41365). [#41483](https://github.com/ClickHouse/ClickHouse/pull/41483) ([Amos Bird](https://github.com/amosbird)). -* Fix conversion from nullable fixed string to string. [#41541](https://github.com/ClickHouse/ClickHouse/pull/41541) ([Duc Canh Le](https://github.com/canhld94)). -* Prevent crash when passing wrong aggregation states to groupBitmap*. [#41563](https://github.com/ClickHouse/ClickHouse/pull/41563) ([Raúl Marín](https://github.com/Algunenano)). -* Queries with `ORDER BY` and `1500 <= LIMIT <= max_block_size` could return incorrect result with missing rows from top. Fixes [#41182](https://github.com/ClickHouse/ClickHouse/issues/41182). [#41576](https://github.com/ClickHouse/ClickHouse/pull/41576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix read bytes/rows in X-ClickHouse-Summary with materialized views. [#41586](https://github.com/ClickHouse/ClickHouse/pull/41586) ([Raúl Marín](https://github.com/Algunenano)). -* Fix possible `pipeline stuck` exception for queries with `OFFSET`. The error was found with `enable_optimize_predicate_expression = 0` and always false condition in `WHERE`. Fixes [#41383](https://github.com/ClickHouse/ClickHouse/issues/41383). [#41588](https://github.com/ClickHouse/ClickHouse/pull/41588) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). - -### ClickHouse release 22.8-lts, 2022-08-18 - -#### Backward Incompatible Change - -* Extended range of `Date32` and `DateTime64` to support dates from the year 1900 to 2299. In previous versions, the supported interval was only from the year 1925 to 2283. The implementation is using the proleptic Gregorian calendar (which is conformant with [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601):2004 (clause 3.2.1 The Gregorian calendar)) instead of accounting for historical transitions from the Julian to the Gregorian calendar. This change affects implementation-specific behavior for out-of-range arguments. E.g. if in previous versions the value of `1899-01-01` was clamped to `1925-01-01`, in the new version it will be clamped to `1900-01-01`. It changes the behavior of rounding with `toStartOfInterval` if you pass `INTERVAL 3 QUARTER` up to one quarter because the intervals are counted from an implementation-specific point of time. Closes [#28216](https://github.com/ClickHouse/ClickHouse/issues/28216), improves [#38393](https://github.com/ClickHouse/ClickHouse/issues/38393). [#39425](https://github.com/ClickHouse/ClickHouse/pull/39425) ([Roman Vasin](https://github.com/rvasin)). -* Now, all relevant dictionary sources respect `remote_url_allow_hosts` setting. It was already done for HTTP, Cassandra, Redis. Added ClickHouse, MongoDB, MySQL, PostgreSQL. Host is checked only for dictionaries created from DDL. [#39184](https://github.com/ClickHouse/ClickHouse/pull/39184) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Make the remote filesystem cache composable, allow not to evict certain files (regarding idx, mrk, ..), delete old cache version. Now it is possible to configure cache over Azure blob storage disk, over Local disk, over StaticWeb disk, etc. This PR is marked backward incompatible because cache configuration changes and in order for cache to work need to update the config file. Old cache will still be used with new configuration. The server will startup fine with the old cache configuration. Closes https://github.com/ClickHouse/ClickHouse/issues/36140. Closes https://github.com/ClickHouse/ClickHouse/issues/37889. ([Kseniia Sumarokova](https://github.com/kssenii)). [#36171](https://github.com/ClickHouse/ClickHouse/pull/36171)) - -#### New Feature - -* Query parameters can be set in interactive mode as `SET param_abc = 'def'` and transferred via the native protocol as settings. [#39906](https://github.com/ClickHouse/ClickHouse/pull/39906) ([Nikita Taranov](https://github.com/nickitat)). -* Quota key can be set in the native protocol ([Yakov Olkhovsky](https://github.com/ClickHouse/ClickHouse/pull/39874)). -* Added a setting `exact_rows_before_limit` (0/1). When enabled, ClickHouse will provide exact value for `rows_before_limit_at_least` statistic, but with the cost that the data before limit will have to be read completely. This closes [#6613](https://github.com/ClickHouse/ClickHouse/issues/6613). [#25333](https://github.com/ClickHouse/ClickHouse/pull/25333) ([kevin wan](https://github.com/MaxWk)). -* Added support for parallel distributed insert select with `s3Cluster` table function into tables with `Distributed` and `Replicated` engine [#34670](https://github.com/ClickHouse/ClickHouse/issues/34670). [#39107](https://github.com/ClickHouse/ClickHouse/pull/39107) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add new settings to control schema inference from text formats: - `input_format_try_infer_dates` - try infer dates from strings. - `input_format_try_infer_datetimes` - try infer datetimes from strings. - `input_format_try_infer_integers` - try infer `Int64` instead of `Float64`. - `input_format_json_try_infer_numbers_from_strings` - try infer numbers from json strings in JSON formats. [#39186](https://github.com/ClickHouse/ClickHouse/pull/39186) ([Kruglov Pavel](https://github.com/Avogar)). -* An option to provide JSON formatted log output. The purpose is to allow easier ingestion and query in log analysis tools. [#39277](https://github.com/ClickHouse/ClickHouse/pull/39277) ([Mallik Hassan](https://github.com/SadiHassan)). -* Add function `nowInBlock` which allows getting the current time during long-running and continuous queries. Closes [#39522](https://github.com/ClickHouse/ClickHouse/issues/39522). Notes: there are no functions `now64InBlock` neither `todayInBlock`. [#39533](https://github.com/ClickHouse/ClickHouse/pull/39533) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add ability to specify settings for an `executable()` table function. [#39681](https://github.com/ClickHouse/ClickHouse/pull/39681) ([Constantine Peresypkin](https://github.com/pkit)). -* Implemented automatic conversion of database engine from `Ordinary` to `Atomic`. Create empty `convert_ordinary_to_atomic` file in `flags` directory and all `Ordinary` databases will be converted automatically on next server start. Resolves [#39546](https://github.com/ClickHouse/ClickHouse/issues/39546). [#39933](https://github.com/ClickHouse/ClickHouse/pull/39933) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Support `SELECT ... INTO OUTFILE '...' AND STDOUT`. [#37490](https://github.com/ClickHouse/ClickHouse/issues/37490). [#39054](https://github.com/ClickHouse/ClickHouse/pull/39054) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Add formats `PrettyMonoBlock`, `PrettyNoEscapesMonoBlock`, `PrettyCompactNoEscapes`, `PrettyCompactNoEscapesMonoBlock`, `PrettySpaceNoEscapes`, `PrettySpaceMonoBlock`, `PrettySpaceNoEscapesMonoBlock`. [#39646](https://github.com/ClickHouse/ClickHouse/pull/39646) ([Kruglov Pavel](https://github.com/Avogar)). -* Add new setting schema_inference_hints that allows to specify structure hints in schema inference for specific columns. Closes [#39569](https://github.com/ClickHouse/ClickHouse/issues/39569). [#40068](https://github.com/ClickHouse/ClickHouse/pull/40068) ([Kruglov Pavel](https://github.com/Avogar)). - -#### Experimental Feature - -* Support SQL standard DELETE FROM syntax on merge tree tables and lightweight delete implementation for merge tree families. [#37893](https://github.com/ClickHouse/ClickHouse/pull/37893) ([Jianmei Zhang](https://github.com/zhangjmruc)) ([Alexander Gololobov](https://github.com/davenger)). Note: this new feature does not make ClickHouse an HTAP DBMS. - -#### Performance Improvement - -* Improved memory usage during memory efficient merging of aggregation results. [#39429](https://github.com/ClickHouse/ClickHouse/pull/39429) ([Nikita Taranov](https://github.com/nickitat)). -* Added concurrency control logic to limit total number of concurrent threads created by queries. [#37558](https://github.com/ClickHouse/ClickHouse/pull/37558) ([Sergei Trifonov](https://github.com/serxa)). Add `concurrent_threads_soft_limit parameter` to increase performance in case of high QPS by means of limiting total number of threads for all queries. [#37285](https://github.com/ClickHouse/ClickHouse/pull/37285) ([Roman Vasin](https://github.com/rvasin)). -* Add `SLRU` cache policy for uncompressed cache and marks cache. ([Kseniia Sumarokova](https://github.com/kssenii)). [#34651](https://github.com/ClickHouse/ClickHouse/pull/34651) ([alexX512](https://github.com/alexX512)). Decoupling local cache function and cache algorithm [#38048](https://github.com/ClickHouse/ClickHouse/pull/38048) ([Han Shukai](https://github.com/KinderRiven)). -* Intel® In-Memory Analytics Accelerator (Intel® IAA) is a hardware accelerator available in the upcoming generation of Intel® Xeon® Scalable processors ("Sapphire Rapids"). Its goal is to speed up common operations in analytics like data (de)compression and filtering. ClickHouse gained the new "DeflateQpl" compression codec which utilizes the Intel® IAA offloading technology to provide a high-performance DEFLATE implementation. The codec uses the [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) which abstracts access to the hardware accelerator, respectively to a software fallback in case the hardware accelerator is not available. DEFLATE provides in general higher compression rates than ClickHouse's LZ4 default codec, and as a result, offers less disk I/O and lower main memory consumption. [#36654](https://github.com/ClickHouse/ClickHouse/pull/36654) ([jasperzhu](https://github.com/jinjunzh)). [#39494](https://github.com/ClickHouse/ClickHouse/pull/39494) ([Robert Schulze](https://github.com/rschu1ze)). -* `DISTINCT` in order with `ORDER BY`: Deduce way to sort based on input stream sort description. Skip sorting if input stream is already sorted. [#38719](https://github.com/ClickHouse/ClickHouse/pull/38719) ([Igor Nikonov](https://github.com/devcrafter)). Improve memory usage (significantly) and query execution time + use `DistinctSortedChunkTransform` for final distinct when `DISTINCT` columns match `ORDER BY` columns, but rename to `DistinctSortedStreamTransform` in `EXPLAIN PIPELINE` → this improves memory usage significantly + remove unnecessary allocations in hot loop in `DistinctSortedChunkTransform`. [#39432](https://github.com/ClickHouse/ClickHouse/pull/39432) ([Igor Nikonov](https://github.com/devcrafter)). Use `DistinctSortedTransform` only when sort description is applicable to DISTINCT columns, otherwise fall back to ordinary DISTINCT implementation + it allows making less checks during `DistinctSortedTransform` execution. [#39528](https://github.com/ClickHouse/ClickHouse/pull/39528) ([Igor Nikonov](https://github.com/devcrafter)). Fix: `DistinctSortedTransform` didn't take advantage of sorting. It never cleared HashSet since clearing_columns were detected incorrectly (always empty). So, it basically worked as ordinary `DISTINCT` (`DistinctTransform`). The fix reduces memory usage significantly. [#39538](https://github.com/ClickHouse/ClickHouse/pull/39538) ([Igor Nikonov](https://github.com/devcrafter)). -* Use local node as first priority to get structure of remote table when executing `cluster` and similar table functions. [#39440](https://github.com/ClickHouse/ClickHouse/pull/39440) ([Mingliang Pan](https://github.com/liangliangpan)). -* Optimize filtering by numeric columns with AVX512VBMI2 compress store. [#39633](https://github.com/ClickHouse/ClickHouse/pull/39633) ([Guo Wangyang](https://github.com/guowangy)). For systems with AVX512 VBMI2, this PR improves performance by ca. 6% for SSB benchmark queries queries 3.1, 3.2 and 3.3 (SF=100). Tested on Intel Icelake Xeon 8380 * 2 socket. [#40033](https://github.com/ClickHouse/ClickHouse/pull/40033) ([Robert Schulze](https://github.com/rschu1ze)). -* Optimize index analysis with functional expressions in multi-thread scenario. [#39812](https://github.com/ClickHouse/ClickHouse/pull/39812) ([Guo Wangyang](https://github.com/guowangy)). -* Optimizations for complex queries: Don't visit the AST for UDFs if none are registered. [#40069](https://github.com/ClickHouse/ClickHouse/pull/40069) ([Raúl Marín](https://github.com/Algunenano)). Optimize CurrentMemoryTracker alloc and free. [#40078](https://github.com/ClickHouse/ClickHouse/pull/40078) ([Raúl Marín](https://github.com/Algunenano)). -* Improved Base58 encoding/decoding. [#39292](https://github.com/ClickHouse/ClickHouse/pull/39292) ([Andrey Zvonov](https://github.com/zvonand)). -* Improve bytes to bits mask transform for SSE/AVX/AVX512. [#39586](https://github.com/ClickHouse/ClickHouse/pull/39586) ([Guo Wangyang](https://github.com/guowangy)). - -#### Improvement - -* Normalize `AggregateFunction` types and state representations because optimizations like [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) will treat `count(not null columns)` as `count()`, which might confuses distributed interpreters with the following error : `Conversion from AggregateFunction(count) to AggregateFunction(count, Int64) is not supported`. [#39420](https://github.com/ClickHouse/ClickHouse/pull/39420) ([Amos Bird](https://github.com/amosbird)). The functions with identical states can be used in materialized views interchangeably. -* Rework and simplify the `system.backups` table, remove the `internal` column, allow user to set the ID of operation, add columns `num_files`, `uncompressed_size`, `compressed_size`, `start_time`, `end_time`. [#39503](https://github.com/ClickHouse/ClickHouse/pull/39503) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improved structure of DDL query result table for `Replicated` database (separate columns with shard and replica name, more clear status) - `CREATE TABLE ... ON CLUSTER` queries can be normalized on initiator first if `distributed_ddl_entry_format_version` is set to 3 (default value). It means that `ON CLUSTER` queries may not work if initiator does not belong to the cluster that specified in query. Fixes [#37318](https://github.com/ClickHouse/ClickHouse/issues/37318), [#39500](https://github.com/ClickHouse/ClickHouse/issues/39500) - Ignore `ON CLUSTER` clause if database is `Replicated` and cluster name equals to database name. Related to [#35570](https://github.com/ClickHouse/ClickHouse/issues/35570) - Miscellaneous minor fixes for `Replicated` database engine - Check metadata consistency when starting up `Replicated` database, start replica recovery in case of mismatch of local metadata and metadata in Keeper. Resolves [#24880](https://github.com/ClickHouse/ClickHouse/issues/24880). [#37198](https://github.com/ClickHouse/ClickHouse/pull/37198) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add result_rows and result_bytes to progress reports (`X-ClickHouse-Summary`). [#39567](https://github.com/ClickHouse/ClickHouse/pull/39567) ([Raúl Marín](https://github.com/Algunenano)). -* Improve primary key analysis for MergeTree. [#25563](https://github.com/ClickHouse/ClickHouse/pull/25563) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* `timeSlots` now works with DateTime64; subsecond duration and slot size available when working with DateTime64. [#37951](https://github.com/ClickHouse/ClickHouse/pull/37951) ([Andrey Zvonov](https://github.com/zvonand)). -* Added support of `LEFT SEMI` and `LEFT ANTI` direct join with `EmbeddedRocksDB` tables. [#38956](https://github.com/ClickHouse/ClickHouse/pull/38956) ([Vladimir C](https://github.com/vdimir)). -* Add profile events for fsync operations. [#39179](https://github.com/ClickHouse/ClickHouse/pull/39179) ([Azat Khuzhin](https://github.com/azat)). -* Add the second argument to the ordinary function `file(path[, default])`, which function returns in the case when a file does not exists. [#39218](https://github.com/ClickHouse/ClickHouse/pull/39218) ([Nikolay Degterinsky](https://github.com/evillique)). -* Some small fixes for reading via http, allow to retry partial content in case if 200 OK. [#39244](https://github.com/ClickHouse/ClickHouse/pull/39244) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support queries `CREATE TEMPORARY TABLE ... () AS ...`. [#39462](https://github.com/ClickHouse/ClickHouse/pull/39462) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support of `!`/`*` (exclamation/asterisk) in custom TLDs (`cutToFirstSignificantSubdomainCustom()`/`cutToFirstSignificantSubdomainCustomWithWWW()`/`firstSignificantSubdomainCustom()`). [#39496](https://github.com/ClickHouse/ClickHouse/pull/39496) ([Azat Khuzhin](https://github.com/azat)). -* Add support for TLS connections to NATS. Implements [#39525](https://github.com/ClickHouse/ClickHouse/issues/39525). [#39527](https://github.com/ClickHouse/ClickHouse/pull/39527) ([Constantine Peresypkin](https://github.com/pkit)). -* `clickhouse-obfuscator` (a tool for database obfuscation for testing and load generation) now has the new `--save` and `--load` parameters to work with pre-trained models. This closes [#39534](https://github.com/ClickHouse/ClickHouse/issues/39534). [#39541](https://github.com/ClickHouse/ClickHouse/pull/39541) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix incorrect behavior of log rotation during restart. [#39558](https://github.com/ClickHouse/ClickHouse/pull/39558) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix building aggregate projections when external aggregation is on. Mark as improvement because the case is rare and there exists easy workaround to fix it via changing settings. This fixes [#39667](https://github.com/ClickHouse/ClickHouse/issues/39667) . [#39671](https://github.com/ClickHouse/ClickHouse/pull/39671) ([Amos Bird](https://github.com/amosbird)). -* Allow to execute hash functions with arguments of type `Map`. [#39685](https://github.com/ClickHouse/ClickHouse/pull/39685) ([Anton Popov](https://github.com/CurtizJ)). -* Add a configuration parameter to hide addresses in stack traces. It may improve security a little but generally, it is harmful and should not be used. [#39690](https://github.com/ClickHouse/ClickHouse/pull/39690) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Change the prefix size of AggregateFunctionDistinct to make sure nested function data memory segment is aligned. [#39696](https://github.com/ClickHouse/ClickHouse/pull/39696) ([Pxl](https://github.com/BiteTheDDDDt)). -* Properly escape credentials passed to the `clickhouse-diagnostic` tool. [#39707](https://github.com/ClickHouse/ClickHouse/pull/39707) ([Dale McDiarmid](https://github.com/gingerwizard)). -* ClickHouse Keeper improvement: create a snapshot on exit. It can be controlled with the config `keeper_server.create_snapshot_on_exit`, `true` by default. [#39755](https://github.com/ClickHouse/ClickHouse/pull/39755) ([Antonio Andelic](https://github.com/antonio2368)). -* Support primary key analysis for `row_policy_filter` and `additional_filter`. It also helps fix issues like [#37454](https://github.com/ClickHouse/ClickHouse/issues/37454) . [#39826](https://github.com/ClickHouse/ClickHouse/pull/39826) ([Amos Bird](https://github.com/amosbird)). -* Fix two usability issues in Play UI: - it was non-pixel-perfect on iPad due to parasitic border radius and margins; - the progress indication did not display after the first query. This closes [#39957](https://github.com/ClickHouse/ClickHouse/issues/39957). This closes [#39960](https://github.com/ClickHouse/ClickHouse/issues/39960). [#39961](https://github.com/ClickHouse/ClickHouse/pull/39961) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Play UI: add row numbers; add cell selection on click; add hysteresis for table cells. [#39962](https://github.com/ClickHouse/ClickHouse/pull/39962) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Play UI: recognize tab key in textarea, but at the same time don't mess up with tab navigation. [#40053](https://github.com/ClickHouse/ClickHouse/pull/40053) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* The client will show server-side elapsed time. This is important for the performance comparison of ClickHouse services in remote datacenters. This closes [#38070](https://github.com/ClickHouse/ClickHouse/issues/38070). See also [this](https://github.com/ClickHouse/ClickBench/blob/main/hardware/benchmark-cloud.sh#L37) for motivation. [#39968](https://github.com/ClickHouse/ClickHouse/pull/39968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Adds `parseDateTime64BestEffortUS`, `parseDateTime64BestEffortUSOrNull`, `parseDateTime64BestEffortUSOrZero` functions, closing [#37492](https://github.com/ClickHouse/ClickHouse/issues/37492). [#40015](https://github.com/ClickHouse/ClickHouse/pull/40015) ([Tanya Bragin](https://github.com/tbragin)). -* Extend the `system.processors_profile_log` with more information such as input rows. [#40121](https://github.com/ClickHouse/ClickHouse/pull/40121) ([Amos Bird](https://github.com/amosbird)). -* Display server-side time in `clickhouse-benchmark` by default if it is available (since ClickHouse version 22.8). This is needed to correctly compare the performance of clouds. This behavior can be changed with the new `--client-side-time` command line option. Change the `--randomize` command line option from `--randomize 1` to the form without argument. [#40193](https://github.com/ClickHouse/ClickHouse/pull/40193) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add counters (ProfileEvents) for cases when query complexity limitation has been set and has reached (a separate counter for `overflow_mode` = `break` and `throw`). For example, if you have set up `max_rows_to_read` with `read_overflow_mode = 'break'`, looking at the value of `OverflowBreak` counter will allow distinguishing incomplete results. [#40205](https://github.com/ClickHouse/ClickHouse/pull/40205) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix memory accounting in case of "Memory limit exceeded" errors (previously [peak] memory usage was takes failed allocations into account). [#40249](https://github.com/ClickHouse/ClickHouse/pull/40249) ([Azat Khuzhin](https://github.com/azat)). -* Add metrics for filesystem cache: `FilesystemCacheSize` and `FilesystemCacheElements`. [#40260](https://github.com/ClickHouse/ClickHouse/pull/40260) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support Hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)). -* Avoid continuously growing memory consumption of pattern cache when using functions multi(Fuzzy)Match(Any|AllIndices|AnyIndex)(). [#40264](https://github.com/ClickHouse/ClickHouse/pull/40264) ([Robert Schulze](https://github.com/rschu1ze)). -* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from the cache if data has not changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)). -* Add support for LARGE_BINARY/LARGE_STRING with Arrow (Closes [#32401](https://github.com/ClickHouse/ClickHouse/issues/32401)). [#40293](https://github.com/ClickHouse/ClickHouse/pull/40293) ([Josh Taylor](https://github.com/joshuataylor)). - -#### Build/Testing/Packaging Improvement - -* [ClickFiddle](https://fiddle.clickhouse.com/): A new tool for testing ClickHouse versions in read/write mode (**Igor Baliuk**). -* ClickHouse binary is made self-extracting [#35775](https://github.com/ClickHouse/ClickHouse/pull/35775) ([Yakov Olkhovskiy, Arthur Filatenkov](https://github.com/yakov-olkhovskiy)). -* Update `tzdata` to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently after it falls back on 2022-09-21. There are corrections to the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and are not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Ensure LSan is effective. [#39430](https://github.com/ClickHouse/ClickHouse/pull/39430) ([Azat Khuzhin](https://github.com/azat)). -* TSAN has issues with clang-14 (https://github.com/google/sanitizers/issues/1552, https://github.com/google/sanitizers/issues/1540), so here we build the TSAN binaries with clang-15. [#39450](https://github.com/ClickHouse/ClickHouse/pull/39450) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Remove the option to build ClickHouse tools as separate executable programs. This fixes [#37847](https://github.com/ClickHouse/ClickHouse/issues/37847). [#39520](https://github.com/ClickHouse/ClickHouse/pull/39520) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in the Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Support build with `clang-16` (trunk). This closes [#39949](https://github.com/ClickHouse/ClickHouse/issues/39949). [#40181](https://github.com/ClickHouse/ClickHouse/pull/40181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Prepare RISC-V 64 build to run in CI. This is for [#40141](https://github.com/ClickHouse/ClickHouse/issues/40141). [#40197](https://github.com/ClickHouse/ClickHouse/pull/40197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Simplified function registration macro interface (`FUNCTION_REGISTER*`) to eliminate the step to add and call an extern function in the registerFunctions.cpp, it also makes incremental builds of a new function faster. [#38615](https://github.com/ClickHouse/ClickHouse/pull/38615) ([Li Yin](https://github.com/liyinsg)). -* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it finds in the config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Bug Fix - -* Fix possible segfault in `CapnProto` input format. This bug was found and sent in through the ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a very rare case of incorrect behavior of the array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix the case when the order of columns can be incorrect if the `IN` operator is used with a table with `ENGINE = Set` containing multiple columns. This fixes [#13014](https://github.com/ClickHouse/ClickHouse/issues/13014). [#40225](https://github.com/ClickHouse/ClickHouse/pull/40225) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix duplicate columns in join plan. Finally, solve [#26809](https://github.com/ClickHouse/ClickHouse/issues/26809). [#40009](https://github.com/ClickHouse/ClickHouse/pull/40009) ([Vladimir C](https://github.com/vdimir)). -* Fixed query hanging for SELECT with ORDER BY WITH FILL with different date/time types. [#37849](https://github.com/ClickHouse/ClickHouse/pull/37849) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix ORDER BY that matches projections ORDER BY (before it simply returns unsorted result). [#38725](https://github.com/ClickHouse/ClickHouse/pull/38725) ([Azat Khuzhin](https://github.com/azat)). -* Do not optimise functions in GROUP BY statements if they shadow one of the table columns or expressions. Fixes [#37032](https://github.com/ClickHouse/ClickHouse/issues/37032). [#39103](https://github.com/ClickHouse/ClickHouse/pull/39103) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Fix wrong table name in logs after RENAME TABLE. This fixes [#38018](https://github.com/ClickHouse/ClickHouse/issues/38018). [#39227](https://github.com/ClickHouse/ClickHouse/pull/39227) ([Amos Bird](https://github.com/amosbird)). -* Fix positional arguments in case of columns pruning when optimising the query. Closes [#38433](https://github.com/ClickHouse/ClickHouse/issues/38433). [#39293](https://github.com/ClickHouse/ClickHouse/pull/39293) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix bug in schema inference in case of empty messages in Protobuf/CapnProto formats that allowed to create column with empty `Tuple` type. Closes [#39051](https://github.com/ClickHouse/ClickHouse/issues/39051) Add 2 new settings `input_format_{protobuf/capnproto}_skip_fields_with_unsupported_types_in_schema_inference` that allow to skip fields with unsupported types while schema inference for Protobuf and CapnProto formats. [#39357](https://github.com/ClickHouse/ClickHouse/pull/39357) ([Kruglov Pavel](https://github.com/Avogar)). -* (Window View is an experimental feature) Fix segmentation fault on `CREATE WINDOW VIEW .. ON CLUSTER ... INNER`. Closes [#39363](https://github.com/ClickHouse/ClickHouse/issues/39363). [#39384](https://github.com/ClickHouse/ClickHouse/pull/39384) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix WriteBuffer finalize when cancelling insert into function (in previous versions it may leat to std::terminate). [#39458](https://github.com/ClickHouse/ClickHouse/pull/39458) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix storing of columns of type `Object` in sparse serialization. [#39464](https://github.com/ClickHouse/ClickHouse/pull/39464) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible "Not found column in block" exception when using projections. This closes [#39469](https://github.com/ClickHouse/ClickHouse/issues/39469). [#39470](https://github.com/ClickHouse/ClickHouse/pull/39470) ([小路](https://github.com/nicelulu)). -* Fix exception on race between DROP and INSERT with materialized views. [#39477](https://github.com/ClickHouse/ClickHouse/pull/39477) ([Azat Khuzhin](https://github.com/azat)). -* A bug in Apache Avro library: fix data race and possible heap-buffer-overflow in Avro format. Closes [#39094](https://github.com/ClickHouse/ClickHouse/issues/39094) Closes [#33652](https://github.com/ClickHouse/ClickHouse/issues/33652). [#39498](https://github.com/ClickHouse/ClickHouse/pull/39498) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix rare bug in asynchronous reading (with setting `local_filesystem_read_method='pread_threadpool'`) with enabled `O_DIRECT` (enabled by setting `min_bytes_to_use_direct_io`). [#39506](https://github.com/ClickHouse/ClickHouse/pull/39506) ([Anton Popov](https://github.com/CurtizJ)). -* (only on FreeBSD) Fixes "Code: 49. DB::Exception: FunctionFactory: the function name '' is not unique. (LOGICAL_ERROR)" observed on FreeBSD when starting clickhouse. [#39551](https://github.com/ClickHouse/ClickHouse/pull/39551) ([Alexander Gololobov](https://github.com/davenger)). -* Fix bug with the recently introduced "maxsplit" argument for `splitByChar`, which was not working correctly. [#39552](https://github.com/ClickHouse/ClickHouse/pull/39552) ([filimonov](https://github.com/filimonov)). -* Fix bug in ASOF JOIN with `enable_optimize_predicate_expression`, close [#37813](https://github.com/ClickHouse/ClickHouse/issues/37813). [#39556](https://github.com/ClickHouse/ClickHouse/pull/39556) ([Vladimir C](https://github.com/vdimir)). -* Fixed `CREATE/DROP INDEX` query with `ON CLUSTER` or `Replicated` database and `ReplicatedMergeTree`. It used to be executed on all replicas (causing error or DDL queue stuck). Fixes [#39511](https://github.com/ClickHouse/ClickHouse/issues/39511). [#39565](https://github.com/ClickHouse/ClickHouse/pull/39565) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix "column not found" error for push down with join, close [#39505](https://github.com/ClickHouse/ClickHouse/issues/39505). [#39575](https://github.com/ClickHouse/ClickHouse/pull/39575) ([Vladimir C](https://github.com/vdimir)). -* Fix the wrong `REGEXP_REPLACE` alias. This fixes https://github.com/ClickHouse/ClickBench/issues/9. [#39592](https://github.com/ClickHouse/ClickHouse/pull/39592) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fixed point of origin for exponential decay window functions to the last value in window. Previously, decay was calculated by formula `exp((t - curr_row_t) / decay_length)`, which is incorrect when right boundary of window is not `CURRENT ROW`. It was changed to: `exp((t - last_row_t) / decay_length)`. There is no change in results for windows with `ROWS BETWEEN (smth) AND CURRENT ROW`. [#39593](https://github.com/ClickHouse/ClickHouse/pull/39593) ([Vladimir Chebotaryov](https://github.com/quickhouse)). -* Fix Decimal division overflow, which can be detected based on operands scale. [#39600](https://github.com/ClickHouse/ClickHouse/pull/39600) ([Andrey Zvonov](https://github.com/zvonand)). -* Fix settings `output_format_arrow_string_as_string` and `output_format_arrow_low_cardinality_as_dictionary` work in combination. Closes [#39624](https://github.com/ClickHouse/ClickHouse/issues/39624). [#39647](https://github.com/ClickHouse/ClickHouse/pull/39647) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed a bug in default database resolution in distributed table reads. [#39674](https://github.com/ClickHouse/ClickHouse/pull/39674) ([Anton Kozlov](https://github.com/tonickkozlov)). -* (Only with the obsolete Ordinary databases) Select might read data of dropped table if cache for mmap IO is used and database engine is Ordinary and new tables was created with the same name as dropped one had. It's fixed. [#39708](https://github.com/ClickHouse/ClickHouse/pull/39708) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix possible error `Invalid column type for ColumnUnique::insertRangeFrom. Expected String, got ColumnLowCardinality` Fixes [#38460](https://github.com/ClickHouse/ClickHouse/issues/38460). [#39716](https://github.com/ClickHouse/ClickHouse/pull/39716) ([Arthur Passos](https://github.com/arthurpassos)). -* Field names in the `meta` section of JSON format were erroneously double escaped. This closes [#39693](https://github.com/ClickHouse/ClickHouse/issues/39693). [#39747](https://github.com/ClickHouse/ClickHouse/pull/39747) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix wrong index analysis with tuples and operator `IN`, which could lead to wrong query result. [#39752](https://github.com/ClickHouse/ClickHouse/pull/39752) ([Anton Popov](https://github.com/CurtizJ)). -* Fix `EmbeddedRocksDB` tables filtering by key using params. [#39757](https://github.com/ClickHouse/ClickHouse/pull/39757) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix error `Invalid number of columns in chunk pushed to OutputPort` which was caused by ARRAY JOIN optimization. Fixes [#39164](https://github.com/ClickHouse/ClickHouse/issues/39164). [#39799](https://github.com/ClickHouse/ClickHouse/pull/39799) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* A workaround for a bug in Linux kernel. Fix `CANNOT_READ_ALL_DATA` exception with `local_filesystem_read_method=pread_threadpool`. This bug affected only Linux kernel version 5.9 and 5.10 according to [man](https://manpages.debian.org/testing/manpages-dev/preadv2.2.en.html#BUGS). [#39800](https://github.com/ClickHouse/ClickHouse/pull/39800) ([Anton Popov](https://github.com/CurtizJ)). -* (Only on NFS) Fix broken NFS mkdir for root-squashed volumes. [#39898](https://github.com/ClickHouse/ClickHouse/pull/39898) ([Constantine Peresypkin](https://github.com/pkit)). -* Remove dictionaries from prometheus metrics on DETACH/DROP. [#39926](https://github.com/ClickHouse/ClickHouse/pull/39926) ([Azat Khuzhin](https://github.com/azat)). -* Fix read of StorageFile with virtual columns. Closes [#39907](https://github.com/ClickHouse/ClickHouse/issues/39907). [#39943](https://github.com/ClickHouse/ClickHouse/pull/39943) ([flynn](https://github.com/ucasfl)). -* Fix big memory usage during fetches. Fixes [#39915](https://github.com/ClickHouse/ClickHouse/issues/39915). [#39990](https://github.com/ClickHouse/ClickHouse/pull/39990) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* (experimental feature) Fix `hashId` crash and salt parameter not being used. [#40002](https://github.com/ClickHouse/ClickHouse/pull/40002) ([Raúl Marín](https://github.com/Algunenano)). -* `EXCEPT` and `INTERSECT` operators may lead to crash if a specific combination of constant and non-constant columns were used. [#40020](https://github.com/ClickHouse/ClickHouse/pull/40020) ([Duc Canh Le](https://github.com/canhld94)). -* Fixed "Part directory doesn't exist" and "`tmp_` ... No such file or directory" errors during too slow INSERT or too long merge/mutation. Also fixed issue that may cause some replication queue entries to stuck without any errors or warnings in logs if previous attempt to fetch part failed, but `tmp-fetch_` directory was not cleaned up. [#40031](https://github.com/ClickHouse/ClickHouse/pull/40031) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix rare cases of parsing of arrays of tuples in format `Values`. [#40034](https://github.com/ClickHouse/ClickHouse/pull/40034) ([Anton Popov](https://github.com/CurtizJ)). -* Fixes ArrowColumn format Dictionary(X) & Dictionary(Nullable(X)) conversion to ClickHouse LowCardinality(X) & LowCardinality(Nullable(X)) respectively. [#40037](https://github.com/ClickHouse/ClickHouse/pull/40037) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix potential deadlock in writing to S3 during task scheduling failure. [#40070](https://github.com/ClickHouse/ClickHouse/pull/40070) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix bug in collectFilesToSkip() by adding correct file extension (.idx or idx2) for indexes to be recalculated, avoid wrong hard links. Fixed [#39896](https://github.com/ClickHouse/ClickHouse/issues/39896). [#40095](https://github.com/ClickHouse/ClickHouse/pull/40095) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* A fix for reverse DNS resolution. [#40134](https://github.com/ClickHouse/ClickHouse/pull/40134) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix unexpected result `arrayDifference` of `Array(UInt32). [#40211](https://github.com/ClickHouse/ClickHouse/pull/40211) ([Duc Canh Le](https://github.com/canhld94)). - -### ClickHouse release 22.7, 2022-07-21 - -#### Upgrade Notes - -* Enable setting `enable_positional_arguments` by default. It allows queries like `SELECT ... ORDER BY 1, 2` where 1, 2 are the references to the select clause. If you need to return the old behavior, disable this setting. [#38204](https://github.com/ClickHouse/ClickHouse/pull/38204) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Disable `format_csv_allow_single_quotes` by default. See [#37096](https://github.com/ClickHouse/ClickHouse/issues/37096). ([Kruglov Pavel](https://github.com/Avogar)). -* `Ordinary` database engine and old storage definition syntax for `*MergeTree` tables are deprecated. By default it's not possible to create new databases with `Ordinary` engine. If `system` database has `Ordinary` engine it will be automatically converted to `Atomic` on server startup. There are settings to keep old behavior (`allow_deprecated_database_ordinary` and `allow_deprecated_syntax_for_merge_tree`), but these settings may be removed in future releases. [#38335](https://github.com/ClickHouse/ClickHouse/pull/38335) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Force rewriting comma join to inner by default (set default value `cross_to_inner_join_rewrite = 2`). To have old behavior set `cross_to_inner_join_rewrite = 1`. [#39326](https://github.com/ClickHouse/ClickHouse/pull/39326) ([Vladimir C](https://github.com/vdimir)). If you will face any incompatibilities, you can turn this setting back. - -#### New Feature - -* Support expressions with window functions. Closes [#19857](https://github.com/ClickHouse/ClickHouse/issues/19857). [#37848](https://github.com/ClickHouse/ClickHouse/pull/37848) ([Dmitry Novik](https://github.com/novikd)). -* Add new `direct` join algorithm for `EmbeddedRocksDB` tables, see [#33582](https://github.com/ClickHouse/ClickHouse/issues/33582). [#35363](https://github.com/ClickHouse/ClickHouse/pull/35363) ([Vladimir C](https://github.com/vdimir)). -* Added full sorting merge join algorithm. [#35796](https://github.com/ClickHouse/ClickHouse/pull/35796) ([Vladimir C](https://github.com/vdimir)). -* Implement NATS table engine, which allows to pub/sub to NATS. Closes [#32388](https://github.com/ClickHouse/ClickHouse/issues/32388). [#37171](https://github.com/ClickHouse/ClickHouse/pull/37171) ([tchepavel](https://github.com/tchepavel)). ([Kseniia Sumarokova](https://github.com/kssenii)) -* Implement table function `mongodb`. Allow writes into `MongoDB` storage / table function. [#37213](https://github.com/ClickHouse/ClickHouse/pull/37213) ([aaapetrenko](https://github.com/aaapetrenko)). ([Kseniia Sumarokova](https://github.com/kssenii)) -* Add `SQLInsert` output format. Closes [#38441](https://github.com/ClickHouse/ClickHouse/issues/38441). [#38477](https://github.com/ClickHouse/ClickHouse/pull/38477) ([Kruglov Pavel](https://github.com/Avogar)). -* Introduced settings `additional_table_filters`. Using this setting, you can specify additional filtering condition for a table which will be applied directly after reading. Example: `select number, x, y from (select number from system.numbers limit 5) f any left join (select x, y from table_1) s on f.number = s.x settings additional_table_filters={'system.numbers : 'number != 3', 'table_1' : 'x != 2'}`. Introduced setting `additional_result_filter` which specifies additional filtering condition for query result. Closes [#37918](https://github.com/ClickHouse/ClickHouse/issues/37918). [#38475](https://github.com/ClickHouse/ClickHouse/pull/38475) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Add `compatibility` setting and `system.settings_changes` system table that contains information about changes in settings through ClickHouse versions. Closes [#35972](https://github.com/ClickHouse/ClickHouse/issues/35972). [#38957](https://github.com/ClickHouse/ClickHouse/pull/38957) ([Kruglov Pavel](https://github.com/Avogar)). -* Add functions `translate(string, from_string, to_string)` and `translateUTF8(string, from_string, to_string)`. It translates some characters to another. [#38935](https://github.com/ClickHouse/ClickHouse/pull/38935) ([Nikolay Degterinsky](https://github.com/evillique)). -* Support `parseTimeDelta` function. It can be used like ` ;-+,:` can be used as separators, eg. `1yr-2mo`, `2m:6s`: `SELECT parseTimeDelta('1yr-2mo-4w + 12 days, 3 hours : 1 minute ; 33 seconds')`. [#39071](https://github.com/ClickHouse/ClickHouse/pull/39071) ([jiahui-97](https://github.com/jiahui-97)). -* Added `CREATE TABLE ... EMPTY AS SELECT` query. It automatically deduces table structure from the SELECT query, but does not fill the table after creation. Resolves [#38049](https://github.com/ClickHouse/ClickHouse/issues/38049). [#38272](https://github.com/ClickHouse/ClickHouse/pull/38272) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Added options to limit IO operations with remote storage: `max_remote_read_network_bandwidth_for_server` and `max_remote_write_network_bandwidth_for_server`. [#39095](https://github.com/ClickHouse/ClickHouse/pull/39095) ([Sergei Trifonov](https://github.com/serxa)). -* Add `group_by_use_nulls` setting to make aggregation key columns nullable in the case of ROLLUP, CUBE and GROUPING SETS. Closes [#37359](https://github.com/ClickHouse/ClickHouse/issues/37359). [#38642](https://github.com/ClickHouse/ClickHouse/pull/38642) ([Dmitry Novik](https://github.com/novikd)). -* Add the ability to specify compression level during data export. [#38907](https://github.com/ClickHouse/ClickHouse/pull/38907) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add an option to require explicit grants to SELECT from the `system` database. Details: [#38970](https://github.com/ClickHouse/ClickHouse/pull/38970) ([Vitaly Baranov](https://github.com/vitlibar)). -* Functions `multiMatchAny`, `multiMatchAnyIndex`, `multiMatchAllIndices` and their fuzzy variants now accept non-const pattern array argument. [#38485](https://github.com/ClickHouse/ClickHouse/pull/38485) ([Robert Schulze](https://github.com/rschu1ze)). SQL function `multiSearchAllPositions` now accepts non-const needle arguments. [#39167](https://github.com/ClickHouse/ClickHouse/pull/39167) ([Robert Schulze](https://github.com/rschu1ze)). -* Add a setting `zstd_window_log_max` to configure max memory usage on zstd decoding when importing external files. Closes [#35693](https://github.com/ClickHouse/ClickHouse/issues/35693). [#37015](https://github.com/ClickHouse/ClickHouse/pull/37015) ([wuxiaobai24](https://github.com/wuxiaobai24)). -* Add `send_logs_source_regexp` setting. Send server text logs with specified regexp to match log source name. Empty means all sources. [#39161](https://github.com/ClickHouse/ClickHouse/pull/39161) ([Amos Bird](https://github.com/amosbird)). -* Support `ALTER` for `Hive` tables. [#38214](https://github.com/ClickHouse/ClickHouse/pull/38214) ([lgbo](https://github.com/lgbo-ustc)). -* Support `isNullable` function. This function checks whether it's argument is nullable and return 1 or 0. Closes [#38611](https://github.com/ClickHouse/ClickHouse/issues/38611). [#38841](https://github.com/ClickHouse/ClickHouse/pull/38841) ([lokax](https://github.com/lokax)). -* Added functions for base58 encoding/decoding. [#38159](https://github.com/ClickHouse/ClickHouse/pull/38159) ([Andrey Zvonov](https://github.com/zvonand)). -* Add chart visualization to Play UI. [#38197](https://github.com/ClickHouse/ClickHouse/pull/38197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Added L2 Squared distance and norm functions for both arrays and tuples. [#38545](https://github.com/ClickHouse/ClickHouse/pull/38545) ([Julian Gilyadov](https://github.com/israelg99)). -* Add ability to pass HTTP headers to the `url` table function / storage via SQL. Closes [#37897](https://github.com/ClickHouse/ClickHouse/issues/37897). [#38176](https://github.com/ClickHouse/ClickHouse/pull/38176) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add `clickhouse-diagnostics` binary to the packages. [#38647](https://github.com/ClickHouse/ClickHouse/pull/38647) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - -#### Experimental Feature - -* Adds new setting `implicit_transaction` to run standalone queries inside a transaction. It handles both creation and closing (via COMMIT if the query succeeded or ROLLBACK if it didn't) of the transaction automatically. [#38344](https://github.com/ClickHouse/ClickHouse/pull/38344) ([Raúl Marín](https://github.com/Algunenano)). - -#### Performance Improvement - -* Distinct optimization for sorted columns. Use specialized distinct transformation in case input stream is sorted by column(s) in distinct. Optimization can be applied to pre-distinct, final distinct, or both. Initial implementation by @dimarub2000. [#37803](https://github.com/ClickHouse/ClickHouse/pull/37803) ([Igor Nikonov](https://github.com/devcrafter)). -* Improve performance of `ORDER BY`, `MergeTree` merges, window functions using batch version of `BinaryHeap`. [#38022](https://github.com/ClickHouse/ClickHouse/pull/38022) ([Maksim Kita](https://github.com/kitaisreal)). -* More parallel execution for queries with `FINAL` [#36396](https://github.com/ClickHouse/ClickHouse/pull/36396) ([Nikita Taranov](https://github.com/nickitat)). -* Fix significant join performance regression which was introduced in [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616). It's interesting that common join queries such as ssb queries have been 10 times slower for almost 3 months while no one complains. [#38052](https://github.com/ClickHouse/ClickHouse/pull/38052) ([Amos Bird](https://github.com/amosbird)). -* Migrate from the Intel hyperscan library to vectorscan, this speeds up many string matching on non-x86 platforms. [#38171](https://github.com/ClickHouse/ClickHouse/pull/38171) ([Robert Schulze](https://github.com/rschu1ze)). -* Increased parallelism of query plan steps executed after aggregation. [#38295](https://github.com/ClickHouse/ClickHouse/pull/38295) ([Nikita Taranov](https://github.com/nickitat)). -* Improve performance of insertion to columns of type `JSON`. [#38320](https://github.com/ClickHouse/ClickHouse/pull/38320) ([Anton Popov](https://github.com/CurtizJ)). -* Optimized insertion and lookups in the HashTable. [#38413](https://github.com/ClickHouse/ClickHouse/pull/38413) ([Nikita Taranov](https://github.com/nickitat)). -* Fix performance degradation from [#32493](https://github.com/ClickHouse/ClickHouse/issues/32493). [#38417](https://github.com/ClickHouse/ClickHouse/pull/38417) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve performance of joining with numeric columns using SIMD instructions. [#37235](https://github.com/ClickHouse/ClickHouse/pull/37235) ([zzachimed](https://github.com/zzachimed)). [#38565](https://github.com/ClickHouse/ClickHouse/pull/38565) ([Maksim Kita](https://github.com/kitaisreal)). -* Norm and Distance functions for arrays speed up 1.2-2 times. [#38740](https://github.com/ClickHouse/ClickHouse/pull/38740) ([Alexander Gololobov](https://github.com/davenger)). -* Add AVX-512 VBMI optimized `copyOverlap32Shuffle` for LZ4 decompression. In other words, LZ4 decompression performance is improved. [#37891](https://github.com/ClickHouse/ClickHouse/pull/37891) ([Guo Wangyang](https://github.com/guowangy)). -* `ORDER BY (a, b)` will use all the same benefits as `ORDER BY a, b`. [#38873](https://github.com/ClickHouse/ClickHouse/pull/38873) ([Igor Nikonov](https://github.com/devcrafter)). -* Align branches within a 32B boundary to make benchmark more stable. [#38988](https://github.com/ClickHouse/ClickHouse/pull/38988) ([Guo Wangyang](https://github.com/guowangy)). It improves performance 1..2% on average for Intel. -* Executable UDF, executable dictionaries, and Executable tables will avoid wasting one second during wait for subprocess termination. [#38929](https://github.com/ClickHouse/ClickHouse/pull/38929) ([Constantine Peresypkin](https://github.com/pkit)). -* Optimize accesses to `system.stack_trace` table if not all columns are selected. [#39177](https://github.com/ClickHouse/ClickHouse/pull/39177) ([Azat Khuzhin](https://github.com/azat)). -* Improve isNullable/isConstant/isNull/isNotNull performance for LowCardinality argument. [#39192](https://github.com/ClickHouse/ClickHouse/pull/39192) ([Kruglov Pavel](https://github.com/Avogar)). -* Optimized processing of ORDER BY in window functions. [#34632](https://github.com/ClickHouse/ClickHouse/pull/34632) ([Vladimir Chebotarev](https://github.com/excitoon)). -* The table `system.asynchronous_metric_log` is further optimized for storage space. This closes [#38134](https://github.com/ClickHouse/ClickHouse/issues/38134). See the [YouTube video](https://www.youtube.com/watch?v=0fSp9SF8N8A). [#38428](https://github.com/ClickHouse/ClickHouse/pull/38428) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### Improvement - -* Support SQL standard CREATE INDEX and DROP INDEX syntax. [#35166](https://github.com/ClickHouse/ClickHouse/pull/35166) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Send profile events for INSERT queries (previously only SELECT was supported). [#37391](https://github.com/ClickHouse/ClickHouse/pull/37391) ([Azat Khuzhin](https://github.com/azat)). -* Implement in order aggregation (`optimize_aggregation_in_order`) for fully materialized projections. [#37469](https://github.com/ClickHouse/ClickHouse/pull/37469) ([Azat Khuzhin](https://github.com/azat)). -* Remove subprocess run for Kerberos initialization. Added new integration test. Closes [#27651](https://github.com/ClickHouse/ClickHouse/issues/27651). [#38105](https://github.com/ClickHouse/ClickHouse/pull/38105) ([Roman Vasin](https://github.com/rvasin)). -* * Add setting `multiple_joins_try_to_keep_original_names` to not rewrite identifier name on multiple JOINs rewrite, close [#34697](https://github.com/ClickHouse/ClickHouse/issues/34697). [#38149](https://github.com/ClickHouse/ClickHouse/pull/38149) ([Vladimir C](https://github.com/vdimir)). -* Improved trace-visualizer UX. [#38169](https://github.com/ClickHouse/ClickHouse/pull/38169) ([Sergei Trifonov](https://github.com/serxa)). -* Enable stack trace collection and query profiler for AArch64. [#38181](https://github.com/ClickHouse/ClickHouse/pull/38181) ([Maksim Kita](https://github.com/kitaisreal)). -* Do not skip symlinks in `user_defined` directory during SQL user defined functions loading. Closes [#38042](https://github.com/ClickHouse/ClickHouse/issues/38042). [#38184](https://github.com/ClickHouse/ClickHouse/pull/38184) ([Maksim Kita](https://github.com/kitaisreal)). -* Added background cleanup of subdirectories in `store/`. In some cases clickhouse-server might left garbage subdirectories in `store/` (for example, on unsuccessful table creation) and those dirs were never been removed. Fixes [#33710](https://github.com/ClickHouse/ClickHouse/issues/33710). [#38265](https://github.com/ClickHouse/ClickHouse/pull/38265) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Add `DESCRIBE CACHE` query to show cache settings from config. Add `SHOW CACHES` query to show available filesystem caches list. [#38279](https://github.com/ClickHouse/ClickHouse/pull/38279) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add access check for `system drop filesystem cache`. Support ON CLUSTER. [#38319](https://github.com/ClickHouse/ClickHouse/pull/38319) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix PostgreSQL database engine incompatibility on upgrade from 21.3 to 22.3. Closes [#36659](https://github.com/ClickHouse/ClickHouse/issues/36659). [#38369](https://github.com/ClickHouse/ClickHouse/pull/38369) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `filesystemAvailable` and similar functions now work in `clickhouse-local`. This closes [#38423](https://github.com/ClickHouse/ClickHouse/issues/38423). [#38424](https://github.com/ClickHouse/ClickHouse/pull/38424) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `revision` function. [#38555](https://github.com/ClickHouse/ClickHouse/pull/38555) ([Azat Khuzhin](https://github.com/azat)). -* Fix GCS via proxy tunnel usage. [#38726](https://github.com/ClickHouse/ClickHouse/pull/38726) ([Azat Khuzhin](https://github.com/azat)). -* Support `\i file` in clickhouse client / local (similar to psql \i). [#38813](https://github.com/ClickHouse/ClickHouse/pull/38813) ([Kseniia Sumarokova](https://github.com/kssenii)). -* New option `optimize = 1` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)). -* Allow trailing comma in columns list. closes [#38425](https://github.com/ClickHouse/ClickHouse/issues/38425). [#38440](https://github.com/ClickHouse/ClickHouse/pull/38440) ([chen](https://github.com/xiedeyantu)). -* Bugfixes and performance improvements for `parallel_hash` JOIN method. [#37648](https://github.com/ClickHouse/ClickHouse/pull/37648) ([Vladimir C](https://github.com/vdimir)). -* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#37852](https://github.com/ClickHouse/ClickHouse/pull/37852) ([Peng Liu](https://github.com/michael1589)). -* Add struct type support in `StorageHive`. [#38118](https://github.com/ClickHouse/ClickHouse/pull/38118) ([lgbo](https://github.com/lgbo-ustc)). -* S3 single objects are now removed with `RemoveObjectRequest`. Implement compatibility with GCP which did not allow to use `removeFileIfExists` effectively breaking approximately half of `remove` functionality. Automatic detection for `DeleteObjects` S3 API, that is not supported by GCS. This will allow to use GCS without explicit `support_batch_delete=0` in configuration. [#37882](https://github.com/ClickHouse/ClickHouse/pull/37882) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Expose basic ClickHouse Keeper related monitoring data (via ProfileEvents and CurrentMetrics). [#38072](https://github.com/ClickHouse/ClickHouse/pull/38072) ([lingpeng0314](https://github.com/lingpeng0314)). -* Support `auto_close` option for PostgreSQL engine connection. Closes [#31486](https://github.com/ClickHouse/ClickHouse/issues/31486). [#38363](https://github.com/ClickHouse/ClickHouse/pull/38363) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allow `NULL` modifier in columns declaration for table functions. [#38816](https://github.com/ClickHouse/ClickHouse/pull/38816) ([Kruglov Pavel](https://github.com/Avogar)). -* Deactivate `mutations_finalizing_task` before shutdown to avoid benign `TABLE_IS_READ_ONLY` errors during shutdown. [#38851](https://github.com/ClickHouse/ClickHouse/pull/38851) ([Raúl Marín](https://github.com/Algunenano)). -* Eliminate unnecessary waiting of SELECT queries after ALTER queries in presence of INSERT queries if you use deprecated Ordinary databases. [#38864](https://github.com/ClickHouse/ClickHouse/pull/38864) ([Azat Khuzhin](https://github.com/azat)). -* New option `rewrite` in `EXPLAIN AST`. If enabled, it shows AST after it's rewritten, otherwise AST of original query. Disabled by default. [#38910](https://github.com/ClickHouse/ClickHouse/pull/38910) ([Igor Nikonov](https://github.com/devcrafter)). -* Stop reporting Zookeeper "Node exists" exceptions in system.errors when they are expected. [#38961](https://github.com/ClickHouse/ClickHouse/pull/38961) ([Raúl Marín](https://github.com/Algunenano)). -* `clickhouse-keeper`: add support for real-time digest calculation and verification. It is disabled by default. [#37555](https://github.com/ClickHouse/ClickHouse/pull/37555) ([Antonio Andelic](https://github.com/antonio2368)). -* Allow to specify globs `* or {expr1, expr2, expr3}` inside a key for `clickhouse-extract-from-config` tool. [#38966](https://github.com/ClickHouse/ClickHouse/pull/38966) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* clearOldLogs: Don't report KEEPER_EXCEPTION on concurrent deletes. [#39016](https://github.com/ClickHouse/ClickHouse/pull/39016) ([Raúl Marín](https://github.com/Algunenano)). -* clickhouse-keeper improvement: persist meta-information about keeper servers to disk. [#39069](https://github.com/ClickHouse/ClickHouse/pull/39069) ([Antonio Andelic](https://github.com/antonio2368)). This will make it easier to operate if you shutdown or restart all keeper nodes at the same time. -* Continue without exception when running out of disk space when using filesystem cache. [#39106](https://github.com/ClickHouse/ClickHouse/pull/39106) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Handling SIGTERM signals from k8s. [#39130](https://github.com/ClickHouse/ClickHouse/pull/39130) ([Timur Solodovnikov](https://github.com/tsolodov)). -* Add `merge_algorithm` column (Undecided, Horizontal, Vertical) to system.part_log. [#39181](https://github.com/ClickHouse/ClickHouse/pull/39181) ([Azat Khuzhin](https://github.com/azat)). -* Don't increment a counter in `system.errors` when the disk is not rotational. [#39216](https://github.com/ClickHouse/ClickHouse/pull/39216) ([Raúl Marín](https://github.com/Algunenano)). -* The metric `result_bytes` for `INSERT` queries in `system.query_log` shows number of bytes inserted. Previously value was incorrect and stored the same value as `result_rows`. [#39225](https://github.com/ClickHouse/ClickHouse/pull/39225) ([Ilya Yatsishin](https://github.com/qoega)). -* The CPU usage metric in clickhouse-client will be displayed in a better way. Fixes [#38756](https://github.com/ClickHouse/ClickHouse/issues/38756). [#39280](https://github.com/ClickHouse/ClickHouse/pull/39280) ([Sergei Trifonov](https://github.com/serxa)). -* Rethrow exception on filesystem cache initialization on server startup, better error message. [#39386](https://github.com/ClickHouse/ClickHouse/pull/39386) ([Kseniia Sumarokova](https://github.com/kssenii)). -* OpenTelemetry now collects traces without Processors spans by default (there are too many). To enable Processors spans collection `opentelemetry_trace_processors` setting. [#39170](https://github.com/ClickHouse/ClickHouse/pull/39170) ([Ilya Yatsishin](https://github.com/qoega)). -* Functions `multiMatch[Fuzzy](AllIndices/Any/AnyIndex)` - don't throw a logical error if the needle argument is empty. [#39012](https://github.com/ClickHouse/ClickHouse/pull/39012) ([Robert Schulze](https://github.com/rschu1ze)). -* Allow to declare `RabbitMQ` queue without default arguments `x-max-length` and `x-overflow`. [#39259](https://github.com/ClickHouse/ClickHouse/pull/39259) ([rnbondarenko](https://github.com/rnbondarenko)). - -#### Build/Testing/Packaging Improvement - -* Apply Clang Thread Safety Analysis (TSA) annotations to ClickHouse. [#38068](https://github.com/ClickHouse/ClickHouse/pull/38068) ([Robert Schulze](https://github.com/rschu1ze)). -* Adapt universal installation script for FreeBSD. [#39302](https://github.com/ClickHouse/ClickHouse/pull/39302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Preparation for building on `s390x` platform. [#39193](https://github.com/ClickHouse/ClickHouse/pull/39193) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Fix a bug in `jemalloc` library [#38757](https://github.com/ClickHouse/ClickHouse/pull/38757) ([Azat Khuzhin](https://github.com/azat)). -* Hardware benchmark now has support for automatic results uploading. [#38427](https://github.com/ClickHouse/ClickHouse/pull/38427) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* System table "system.licenses" is now correctly populated on Mac (Darwin). [#38294](https://github.com/ClickHouse/ClickHouse/pull/38294) ([Robert Schulze](https://github.com/rschu1ze)). -* Change `all|noarch` packages to architecture-dependent - Fix some documentation for it - Push aarch64|arm64 packages to artifactory and release assets - Fixes [#36443](https://github.com/ClickHouse/ClickHouse/issues/36443). [#38580](https://github.com/ClickHouse/ClickHouse/pull/38580) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - -#### Bug Fix (user-visible misbehavior in official stable or prestable release) - -* Fix rounding for `Decimal128/Decimal256` with more than 19-digits long scale. [#38027](https://github.com/ClickHouse/ClickHouse/pull/38027) ([Igor Nikonov](https://github.com/devcrafter)). -* Fixed crash caused by data race in storage `Hive` (integration table engine). [#38887](https://github.com/ClickHouse/ClickHouse/pull/38887) ([lgbo](https://github.com/lgbo-ustc)). -* Fix crash when executing GRANT ALL ON *.* with ON CLUSTER. It was broken in https://github.com/ClickHouse/ClickHouse/pull/35767. This closes [#38618](https://github.com/ClickHouse/ClickHouse/issues/38618). [#38674](https://github.com/ClickHouse/ClickHouse/pull/38674) ([Vitaly Baranov](https://github.com/vitlibar)). -* Correct glob expansion in case of `{0..10}` forms. Fixes [#38498](https://github.com/ClickHouse/ClickHouse/issues/38498) Current Implementation is similar to what shell does mentiond by @rschu1ze [here](https://github.com/ClickHouse/ClickHouse/pull/38502#issuecomment-1169057723). [#38502](https://github.com/ClickHouse/ClickHouse/pull/38502) ([Heena Bansal](https://github.com/HeenaBansal2009)). -* Fix crash for `mapUpdate`, `mapFilter` functions when using with constant map argument. Closes [#38547](https://github.com/ClickHouse/ClickHouse/issues/38547). [#38553](https://github.com/ClickHouse/ClickHouse/pull/38553) ([hexiaoting](https://github.com/hexiaoting)). -* Fix `toHour` monotonicity information for query optimization which can lead to incorrect query result (incorrect index analysis). This fixes [#38333](https://github.com/ClickHouse/ClickHouse/issues/38333). [#38675](https://github.com/ClickHouse/ClickHouse/pull/38675) ([Amos Bird](https://github.com/amosbird)). -* Fix checking whether s3 storage support parallel writes. It resulted in s3 parallel writes not working. [#38792](https://github.com/ClickHouse/ClickHouse/pull/38792) ([chen](https://github.com/xiedeyantu)). -* Fix s3 seekable reads with parallel read buffer. (Affected memory usage during query). Closes [#38258](https://github.com/ClickHouse/ClickHouse/issues/38258). [#38802](https://github.com/ClickHouse/ClickHouse/pull/38802) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Update `simdjson`. This fixes [#38621](https://github.com/ClickHouse/ClickHouse/issues/38621) - a buffer overflow on machines with the latest Intel CPUs with AVX-512 VBMI. [#38838](https://github.com/ClickHouse/ClickHouse/pull/38838) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix possible logical error for Vertical merges. [#38859](https://github.com/ClickHouse/ClickHouse/pull/38859) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix settings profile with seconds unit. [#38896](https://github.com/ClickHouse/ClickHouse/pull/38896) ([Raúl Marín](https://github.com/Algunenano)). -* Fix incorrect partition pruning when there is a nullable partition key. Note: most likely you don't use nullable partition keys - this is an obscure feature you should not use. Nullable keys are a nonsense and this feature is only needed for some crazy use-cases. This fixes [#38941](https://github.com/ClickHouse/ClickHouse/issues/38941). [#38946](https://github.com/ClickHouse/ClickHouse/pull/38946) ([Amos Bird](https://github.com/amosbird)). -* Improve `fsync_part_directory` for fetches. [#38993](https://github.com/ClickHouse/ClickHouse/pull/38993) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible dealock inside `OvercommitTracker`. Fixes [#37794](https://github.com/ClickHouse/ClickHouse/issues/37794). [#39030](https://github.com/ClickHouse/ClickHouse/pull/39030) ([Dmitry Novik](https://github.com/novikd)). -* Fix bug in filesystem cache that could happen in some corner case which coincided with cache capacity hitting the limit. Closes [#39066](https://github.com/ClickHouse/ClickHouse/issues/39066). [#39070](https://github.com/ClickHouse/ClickHouse/pull/39070) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix some corner cases of interpretation of the arguments of window expressions. Fixes [#38538](https://github.com/ClickHouse/ClickHouse/issues/38538) Allow using of higher-order functions in window expressions. [#39112](https://github.com/ClickHouse/ClickHouse/pull/39112) ([Dmitry Novik](https://github.com/novikd)). -* Keep `LowCardinality` type in `tuple` function. Previously `LowCardinality` type was dropped and elements of created tuple had underlying type of `LowCardinality`. [#39113](https://github.com/ClickHouse/ClickHouse/pull/39113) ([Anton Popov](https://github.com/CurtizJ)). -* Fix error `Block structure mismatch` which could happen for INSERT into table with attached MATERIALIZED VIEW and enabled setting `extremes = 1`. Closes [#29759](https://github.com/ClickHouse/ClickHouse/issues/29759) and [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729). [#39125](https://github.com/ClickHouse/ClickHouse/pull/39125) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix unexpected query result when both `optimize_trivial_count_query` and `empty_result_for_aggregation_by_empty_set` are set to true. This fixes [#39140](https://github.com/ClickHouse/ClickHouse/issues/39140). [#39155](https://github.com/ClickHouse/ClickHouse/pull/39155) ([Amos Bird](https://github.com/amosbird)). -* Fixed error `Not found column Type in block` in selects with `PREWHERE` and read-in-order optimizations. [#39157](https://github.com/ClickHouse/ClickHouse/pull/39157) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix extremely rare race condition in during hardlinks for remote filesystem. The only way to reproduce it is concurrent run of backups. [#39190](https://github.com/ClickHouse/ClickHouse/pull/39190) ([alesapin](https://github.com/alesapin)). -* (zero-copy replication is an experimental feature that should not be used in production) Fix fetch of in-memory part with `allow_remote_fs_zero_copy_replication`. [#39214](https://github.com/ClickHouse/ClickHouse/pull/39214) ([Azat Khuzhin](https://github.com/azat)). -* (MaterializedPostgreSQL - experimental feature). Fix segmentation fault in MaterializedPostgreSQL database engine, which could happen if some exception occurred at replication initialisation. Closes [#36939](https://github.com/ClickHouse/ClickHouse/issues/36939). [#39272](https://github.com/ClickHouse/ClickHouse/pull/39272) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix incorrect fetch of table metadata from PostgreSQL database engine. Closes [#33502](https://github.com/ClickHouse/ClickHouse/issues/33502). [#39283](https://github.com/ClickHouse/ClickHouse/pull/39283) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix projection exception when aggregation keys are wrapped inside other functions. This fixes [#37151](https://github.com/ClickHouse/ClickHouse/issues/37151). [#37155](https://github.com/ClickHouse/ClickHouse/pull/37155) ([Amos Bird](https://github.com/amosbird)). -* Fix possible logical error `... with argument with type Nothing and default implementation for Nothing is expected to return result with type Nothing, got ...` in some functions. Closes: [#37610](https://github.com/ClickHouse/ClickHouse/issues/37610) Closes: [#37741](https://github.com/ClickHouse/ClickHouse/issues/37741). [#37759](https://github.com/ClickHouse/ClickHouse/pull/37759) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix incorrect columns order in subqueries of UNION (in case of duplicated columns in subselects may produce incorrect result). [#37887](https://github.com/ClickHouse/ClickHouse/pull/37887) ([Azat Khuzhin](https://github.com/azat)). -* Fix incorrect work of MODIFY ALTER Column with column names that contain dots. Closes [#37907](https://github.com/ClickHouse/ClickHouse/issues/37907). [#37971](https://github.com/ClickHouse/ClickHouse/pull/37971) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix reading of sparse columns from `MergeTree` tables that store their data in S3. [#37978](https://github.com/ClickHouse/ClickHouse/pull/37978) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible crash in `Distributed` async insert in case of removing a replica from config. [#38029](https://github.com/ClickHouse/ClickHouse/pull/38029) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix "Missing columns" for GLOBAL JOIN with CTE without alias. [#38056](https://github.com/ClickHouse/ClickHouse/pull/38056) ([Azat Khuzhin](https://github.com/azat)). -* Rewrite tuple functions as literals in backwards-compatibility mode. [#38096](https://github.com/ClickHouse/ClickHouse/pull/38096) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Fix redundant memory reservation for output block during `ORDER BY`. [#38127](https://github.com/ClickHouse/ClickHouse/pull/38127) ([iyupeng](https://github.com/iyupeng)). -* Fix possible logical error `Bad cast from type DB::IColumn* to DB::ColumnNullable*` in array mapped functions. Closes [#38006](https://github.com/ClickHouse/ClickHouse/issues/38006). [#38132](https://github.com/ClickHouse/ClickHouse/pull/38132) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix temporary name clash in partial merge join, close [#37928](https://github.com/ClickHouse/ClickHouse/issues/37928). [#38135](https://github.com/ClickHouse/ClickHouse/pull/38135) ([Vladimir C](https://github.com/vdimir)). -* Some minr issue with queries like `CREATE TABLE nested_name_tuples (`a` Tuple(x String, y Tuple(i Int32, j String))) ENGINE = Memory;` [#38136](https://github.com/ClickHouse/ClickHouse/pull/38136) ([lgbo](https://github.com/lgbo-ustc)). -* Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)). -* (Window View is a experimental feature) Fix LOGICAL_ERROR for WINDOW VIEW with incorrect structure. [#38205](https://github.com/ClickHouse/ClickHouse/pull/38205) ([Azat Khuzhin](https://github.com/azat)). -* Update librdkafka submodule to fix crash when an OAUTHBEARER refresh callback is set. [#38225](https://github.com/ClickHouse/ClickHouse/pull/38225) ([Rafael Acevedo](https://github.com/racevedoo)). -* Fix INSERT into Distributed hung due to ProfileEvents. [#38307](https://github.com/ClickHouse/ClickHouse/pull/38307) ([Azat Khuzhin](https://github.com/azat)). -* Fix retries in PostgreSQL engine. [#38310](https://github.com/ClickHouse/ClickHouse/pull/38310) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result). [#38324](https://github.com/ClickHouse/ClickHouse/pull/38324) ([Azat Khuzhin](https://github.com/azat)). -* Fix RabbitMQ with formats based on PeekableReadBuffer. Closes [#38061](https://github.com/ClickHouse/ClickHouse/issues/38061). [#38356](https://github.com/ClickHouse/ClickHouse/pull/38356) ([Kseniia Sumarokova](https://github.com/kssenii)). -* MaterializedPostgreSQL - experimentail feature. Fix possible `Invalid number of rows in Chunk` in MaterializedPostgreSQL. Closes [#37323](https://github.com/ClickHouse/ClickHouse/issues/37323). [#38360](https://github.com/ClickHouse/ClickHouse/pull/38360) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix RabbitMQ configuration with connection string setting. Closes [#36531](https://github.com/ClickHouse/ClickHouse/issues/36531). [#38365](https://github.com/ClickHouse/ClickHouse/pull/38365) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix PostgreSQL engine not using PostgreSQL schema when retrieving array dimension size. Closes [#36755](https://github.com/ClickHouse/ClickHouse/issues/36755). Closes [#36772](https://github.com/ClickHouse/ClickHouse/issues/36772). [#38366](https://github.com/ClickHouse/ClickHouse/pull/38366) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possibly incorrect result of distributed queries with `DISTINCT` and `LIMIT`. Fixes [#38282](https://github.com/ClickHouse/ClickHouse/issues/38282). [#38371](https://github.com/ClickHouse/ClickHouse/pull/38371) ([Anton Popov](https://github.com/CurtizJ)). -* Fix wrong results of countSubstrings() & position() on patterns with 0-bytes. [#38589](https://github.com/ClickHouse/ClickHouse/pull/38589) ([Robert Schulze](https://github.com/rschu1ze)). -* Now it's possible to start a clickhouse-server and attach/detach tables even for tables with the incorrect values of IPv4/IPv6 representation. Proper fix for issue [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#38590](https://github.com/ClickHouse/ClickHouse/pull/38590) ([alesapin](https://github.com/alesapin)). -* `rankCorr` function will work correctly if some arguments are NaNs. This closes [#38396](https://github.com/ClickHouse/ClickHouse/issues/38396). [#38722](https://github.com/ClickHouse/ClickHouse/pull/38722) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix `parallel_view_processing=1` with `optimize_trivial_insert_select=1`. Fix `max_insert_threads` while pushing to views. [#38731](https://github.com/ClickHouse/ClickHouse/pull/38731) ([Azat Khuzhin](https://github.com/azat)). -* Fix use-after-free for aggregate functions with `Map` combinator that leads to incorrect result. [#38748](https://github.com/ClickHouse/ClickHouse/pull/38748) ([Azat Khuzhin](https://github.com/azat)). - -### ClickHouse release 22.6, 2022-06-16 - -#### Backward Incompatible Change - -* Remove support for octal number literals in SQL. In previous versions they were parsed as Float64. [#37765](https://github.com/ClickHouse/ClickHouse/pull/37765) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Changes how settings using `seconds` as type are parsed to support floating point values (for example: `max_execution_time=0.5`). Infinity or NaN values will throw an exception. [#37187](https://github.com/ClickHouse/ClickHouse/pull/37187) ([Raúl Marín](https://github.com/Algunenano)). -* Changed format of binary serialization of columns of experimental type `Object`. New format is more convenient to implement by third-party clients. [#37482](https://github.com/ClickHouse/ClickHouse/pull/37482) ([Anton Popov](https://github.com/CurtizJ)). -* Turn on setting `output_format_json_named_tuples_as_objects` by default. It allows to serialize named tuples as JSON objects in JSON formats. [#37756](https://github.com/ClickHouse/ClickHouse/pull/37756) ([Anton Popov](https://github.com/CurtizJ)). -* LIKE patterns with trailing escape symbol ('\\') are now disallowed (as mandated by the SQL standard). [#37764](https://github.com/ClickHouse/ClickHouse/pull/37764) ([Robert Schulze](https://github.com/rschu1ze)). -* If you run different ClickHouse versions on a cluster with AArch64 CPU or mix AArch64 and amd64 on a cluster, and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, and the size of the result is huge, the data will not be fully aggregated in the result of these queries during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade. - -#### New Feature - -* Add `GROUPING` function. It allows to disambiguate the records in the queries with `ROLLUP`, `CUBE` or `GROUPING SETS`. Closes [#19426](https://github.com/ClickHouse/ClickHouse/issues/19426). [#37163](https://github.com/ClickHouse/ClickHouse/pull/37163) ([Dmitry Novik](https://github.com/novikd)). -* A new codec [FPC](https://userweb.cs.txstate.edu/~burtscher/papers/dcc07a.pdf) algorithm for floating point data compression. [#37553](https://github.com/ClickHouse/ClickHouse/pull/37553) ([Mikhail Guzov](https://github.com/koloshmet)). -* Add new columnar JSON formats: `JSONColumns`, `JSONCompactColumns`, `JSONColumnsWithMetadata`. Closes [#36338](https://github.com/ClickHouse/ClickHouse/issues/36338) Closes [#34509](https://github.com/ClickHouse/ClickHouse/issues/34509). [#36975](https://github.com/ClickHouse/ClickHouse/pull/36975) ([Kruglov Pavel](https://github.com/Avogar)). -* Added open telemetry traces visualizing tool based on d3js. [#37810](https://github.com/ClickHouse/ClickHouse/pull/37810) ([Sergei Trifonov](https://github.com/serxa)). -* Support INSERTs into `system.zookeeper` table. Closes [#22130](https://github.com/ClickHouse/ClickHouse/issues/22130). [#37596](https://github.com/ClickHouse/ClickHouse/pull/37596) ([Han Fei](https://github.com/hanfei1991)). -* Support non-constant pattern argument for `LIKE`, `ILIKE` and `match` functions. [#37251](https://github.com/ClickHouse/ClickHouse/pull/37251) ([Robert Schulze](https://github.com/rschu1ze)). -* Executable user defined functions now support parameters. Example: `SELECT test_function(parameters)(arguments)`. Closes [#37578](https://github.com/ClickHouse/ClickHouse/issues/37578). [#37720](https://github.com/ClickHouse/ClickHouse/pull/37720) ([Maksim Kita](https://github.com/kitaisreal)). -* Add `merge_reason` column to system.part_log table. [#36912](https://github.com/ClickHouse/ClickHouse/pull/36912) ([Sema Checherinda](https://github.com/CheSema)). -* Add support for Maps and Records in Avro format. Add new setting `input_format_avro_null_as_default ` that allow to insert null as default in Avro format. Closes [#18925](https://github.com/ClickHouse/ClickHouse/issues/18925) Closes [#37378](https://github.com/ClickHouse/ClickHouse/issues/37378) Closes [#32899](https://github.com/ClickHouse/ClickHouse/issues/32899). [#37525](https://github.com/ClickHouse/ClickHouse/pull/37525) ([Kruglov Pavel](https://github.com/Avogar)). -* Add `clickhouse-disks` tool to introspect and operate on virtual filesystems configured for ClickHouse. [#36060](https://github.com/ClickHouse/ClickHouse/pull/36060) ([Artyom Yurkov](https://github.com/Varinara)). -* Adds H3 unidirectional edge functions. [#36843](https://github.com/ClickHouse/ClickHouse/pull/36843) ([Bharat Nallan](https://github.com/bharatnc)). -* Add support for calculating [hashids](https://hashids.org/) from unsigned integers. [#37013](https://github.com/ClickHouse/ClickHouse/pull/37013) ([Michael Nutt](https://github.com/mnutt)). -* Explicit `SALT` specification is allowed for `CREATE USER IDENTIFIED WITH sha256_hash`. [#37377](https://github.com/ClickHouse/ClickHouse/pull/37377) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)). -* `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)). -* Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)). - -#### Experimental Feature - -* Enables `POPULATE` for `WINDOW VIEW`. [#36945](https://github.com/ClickHouse/ClickHouse/pull/36945) ([vxider](https://github.com/Vxider)). -* `ALTER TABLE ... MODIFY QUERY` support for `WINDOW VIEW`. [#37188](https://github.com/ClickHouse/ClickHouse/pull/37188) ([vxider](https://github.com/Vxider)). -* This PR changes the behavior of the `ENGINE` syntax in `WINDOW VIEW`, to make it like in `MATERIALIZED VIEW`. [#37214](https://github.com/ClickHouse/ClickHouse/pull/37214) ([vxider](https://github.com/Vxider)). - -#### Performance Improvement - -* Added numerous optimizations for ARM NEON [#38093](https://github.com/ClickHouse/ClickHouse/pull/38093)([Daniel Kutenin](https://github.com/danlark1)), ([Alexandra Pilipyuk](https://github.com/chalice19)) Note: if you run different ClickHouse versions on a cluster with ARM CPU and use distributed queries with GROUP BY multiple keys of fixed-size type that fit in 256 bits but don't fit in 64 bits, the result of the aggregation query will be wrong during upgrade. Workaround: upgrade with downtime instead of a rolling upgrade. -* Improve performance and memory usage for select of subset of columns for formats Native, Protobuf, CapnProto, JSONEachRow, TSKV, all formats with suffixes WithNames/WithNamesAndTypes. Previously while selecting only subset of columns from files in these formats all columns were read and stored in memory. Now only required columns are read. This PR enables setting `input_format_skip_unknown_fields` by default, because otherwise in case of select of subset of columns exception will be thrown. [#37192](https://github.com/ClickHouse/ClickHouse/pull/37192) ([Kruglov Pavel](https://github.com/Avogar)). -* Now more filters can be pushed down for join. [#37472](https://github.com/ClickHouse/ClickHouse/pull/37472) ([Amos Bird](https://github.com/amosbird)). -* Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Improved performance of aggregation in case, when sparse columns (can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization` in `MergeTree` tables) are used as arguments in aggregate functions. [#37617](https://github.com/ClickHouse/ClickHouse/pull/37617) ([Anton Popov](https://github.com/CurtizJ)). -* Optimize function `COALESCE` with two arguments. [#37666](https://github.com/ClickHouse/ClickHouse/pull/37666) ([Anton Popov](https://github.com/CurtizJ)). -* Replace `multiIf` to `if` in case when `multiIf` has only one condition, because function `if` is more performant. [#37695](https://github.com/ClickHouse/ClickHouse/pull/37695) ([Anton Popov](https://github.com/CurtizJ)). -* Improve performance of `dictGetDescendants`, `dictGetChildren` functions, create temporary parent to children hierarchical index per query, not per function call during query. Allow to specify `BIDIRECTIONAL` for `HIERARHICAL` attributes, dictionary will maintain parent to children index in memory, that way functions `dictGetDescendants`, `dictGetChildren` will not create temporary index per query. Closes [#32481](https://github.com/ClickHouse/ClickHouse/issues/32481). [#37148](https://github.com/ClickHouse/ClickHouse/pull/37148) ([Maksim Kita](https://github.com/kitaisreal)). -* Aggregates state destruction now may be posted on a thread pool. For queries with LIMIT and big state it provides significant speedup, e.g. `select uniq(number) from numbers_mt(1e7) group by number limit 100` became around 2.5x faster. [#37855](https://github.com/ClickHouse/ClickHouse/pull/37855) ([Nikita Taranov](https://github.com/nickitat)). -* Improve sort performance by single column. [#37195](https://github.com/ClickHouse/ClickHouse/pull/37195) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of single column sorting using sorting queue specializations. [#37990](https://github.com/ClickHouse/ClickHouse/pull/37990) ([Maksim Kita](https://github.com/kitaisreal)). -* Improved performance on array norm and distance functions 2x-4x times. [#37394](https://github.com/ClickHouse/ClickHouse/pull/37394) ([Alexander Gololobov](https://github.com/davenger)). -* Improve performance of number comparison functions using dynamic dispatch. [#37399](https://github.com/ClickHouse/ClickHouse/pull/37399) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of ORDER BY with LIMIT. [#37481](https://github.com/ClickHouse/ClickHouse/pull/37481) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of `hasAll` function using dynamic dispatch infrastructure. [#37484](https://github.com/ClickHouse/ClickHouse/pull/37484) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of `greatCircleAngle`, `greatCircleDistance`, `geoDistance` functions. [#37524](https://github.com/ClickHouse/ClickHouse/pull/37524) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of insert into MergeTree if there are multiple columns in ORDER BY. [#35762](https://github.com/ClickHouse/ClickHouse/pull/35762) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix excessive CPU usage in background when there are a lot of tables. [#38028](https://github.com/ClickHouse/ClickHouse/pull/38028) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of `not` function using dynamic dispatch. [#38058](https://github.com/ClickHouse/ClickHouse/pull/38058) ([Maksim Kita](https://github.com/kitaisreal)). -* Optimized the internal caching of re2 patterns which occur e.g. in LIKE and MATCH functions. [#37544](https://github.com/ClickHouse/ClickHouse/pull/37544) ([Robert Schulze](https://github.com/rschu1ze)). -* Improve filter bitmask generator function all in one with AVX-512 instructions. [#37588](https://github.com/ClickHouse/ClickHouse/pull/37588) ([yaqi-zhao](https://github.com/yaqi-zhao)). -* Apply read method `threadpool` for Hive integration engine. This will significantly speed up reading. [#36328](https://github.com/ClickHouse/ClickHouse/pull/36328) ([李扬](https://github.com/taiyang-li)). -* When all the columns to read are partition keys, construct columns by the file's row number without real reading the Hive file. [#37103](https://github.com/ClickHouse/ClickHouse/pull/37103) ([lgbo](https://github.com/lgbo-ustc)). -* Support multi disks for caching hive files. [#37279](https://github.com/ClickHouse/ClickHouse/pull/37279) ([lgbo](https://github.com/lgbo-ustc)). -* Limiting the maximum cache usage per query can effectively prevent cache pool contamination. [Related Issues](https://github.com/ClickHouse/ClickHouse/issues/28961). [#37859](https://github.com/ClickHouse/ClickHouse/pull/37859) ([Han Shukai](https://github.com/KinderRiven)). -* Currently clickhouse directly downloads all remote files to the local cache (even if they are only read once), which will frequently cause IO of the local hard disk. In some scenarios, these IOs may not be necessary and may easily cause negative optimization. As shown in the figure below, when we run SSB Q1-Q4, the performance of the cache has caused negative optimization. [#37516](https://github.com/ClickHouse/ClickHouse/pull/37516) ([Han Shukai](https://github.com/KinderRiven)). -* Allow to prune the list of files via virtual columns such as `_file` and `_path` when reading from S3. This is for [#37174](https://github.com/ClickHouse/ClickHouse/issues/37174) , [#23494](https://github.com/ClickHouse/ClickHouse/issues/23494). [#37356](https://github.com/ClickHouse/ClickHouse/pull/37356) ([Amos Bird](https://github.com/amosbird)). -* In function: CompressedWriteBuffer::nextImpl(), there is an unnecessary write-copy step that would happen frequently during inserting data. Below shows the differentiation with this patch: - Before: 1. Compress "working_buffer" into "compressed_buffer" 2. write-copy into "out" - After: Directly Compress "working_buffer" into "out". [#37242](https://github.com/ClickHouse/ClickHouse/pull/37242) ([jasperzhu](https://github.com/jinjunzh)). - -#### Improvement - -* Support types with non-standard defaults in ROLLUP, CUBE, GROUPING SETS. Closes [#37360](https://github.com/ClickHouse/ClickHouse/issues/37360). [#37667](https://github.com/ClickHouse/ClickHouse/pull/37667) ([Dmitry Novik](https://github.com/novikd)). -* Fix stack traces collection on ARM. Closes [#37044](https://github.com/ClickHouse/ClickHouse/issues/37044). Closes [#15638](https://github.com/ClickHouse/ClickHouse/issues/15638). [#37797](https://github.com/ClickHouse/ClickHouse/pull/37797) ([Maksim Kita](https://github.com/kitaisreal)). -* Client will try every IP address returned by DNS resolution until successful connection. [#37273](https://github.com/ClickHouse/ClickHouse/pull/37273) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Allow to use String type instead of Binary in Arrow/Parquet/ORC formats. This PR introduces 3 new settings for it: `output_format_arrow_string_as_string`, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`. Default value for all settings is `false`. [#37327](https://github.com/ClickHouse/ClickHouse/pull/37327) ([Kruglov Pavel](https://github.com/Avogar)). -* Apply setting `input_format_max_rows_to_read_for_schema_inference` for all read rows in total from all files in globs. Previously setting `input_format_max_rows_to_read_for_schema_inference` was applied for each file in glob separately and in case of huge number of nulls we could read first `input_format_max_rows_to_read_for_schema_inference` rows from each file and get nothing. Also increase default value for this setting to 25000. [#37332](https://github.com/ClickHouse/ClickHouse/pull/37332) ([Kruglov Pavel](https://github.com/Avogar)). -* Add separate `CLUSTER` grant (and `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive, for backward compatibility, default to `false`). [#35767](https://github.com/ClickHouse/ClickHouse/pull/35767) ([Azat Khuzhin](https://github.com/azat)). -* Added support for schema inference for `hdfsCluster`. [#35812](https://github.com/ClickHouse/ClickHouse/pull/35812) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Implement `least_used` load balancing algorithm for disks inside volume (multi disk configuration). [#36686](https://github.com/ClickHouse/ClickHouse/pull/36686) ([Azat Khuzhin](https://github.com/azat)). -* Modify the HTTP Endpoint to return the full stats under the `X-ClickHouse-Summary` header when `send_progress_in_http_headers=0` (before it would return all zeros). - Modify the HTTP Endpoint to return `X-ClickHouse-Exception-Code` header when progress has been sent before (`send_progress_in_http_headers=1`) - Modify the HTTP Endpoint to return `HTTP_REQUEST_TIMEOUT` (408) instead of `HTTP_INTERNAL_SERVER_ERROR` (500) on `TIMEOUT_EXCEEDED` errors. [#36884](https://github.com/ClickHouse/ClickHouse/pull/36884) ([Raúl Marín](https://github.com/Algunenano)). -* Allow a user to inspect grants from granted roles. [#36941](https://github.com/ClickHouse/ClickHouse/pull/36941) ([nvartolomei](https://github.com/nvartolomei)). -* Do not calculate an integral numerically but use CDF functions instead. This will speed up execution and will increase the precision. This fixes [#36714](https://github.com/ClickHouse/ClickHouse/issues/36714). [#36953](https://github.com/ClickHouse/ClickHouse/pull/36953) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add default implementation for Nothing in functions. Now most of the functions will return column with type Nothing in case one of it's arguments is Nothing. It also solves problem with functions like arrayMap/arrayFilter and similar when they have empty array as an argument. Previously queries like `select arrayMap(x -> 2 * x, []);` failed because function inside lambda cannot work with type `Nothing`, now such queries return empty array with type `Array(Nothing)`. Also add support for arrays of nullable types in functions like arrayFilter/arrayFill. Previously, queries like `select arrayFilter(x -> x % 2, [1, NULL])` failed, now they work (if the result of lambda is NULL, then this value won't be included in the result). Closes [#37000](https://github.com/ClickHouse/ClickHouse/issues/37000). [#37048](https://github.com/ClickHouse/ClickHouse/pull/37048) ([Kruglov Pavel](https://github.com/Avogar)). -* Now if a shard has local replica we create a local plan and a plan to read from all remote replicas. They have shared initiator which coordinates reading. [#37204](https://github.com/ClickHouse/ClickHouse/pull/37204) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Do no longer abort server startup if configuration option "mark_cache_size" is not explicitly set. [#37326](https://github.com/ClickHouse/ClickHouse/pull/37326) ([Robert Schulze](https://github.com/rschu1ze)). -* Allows providing `NULL`/`NOT NULL` right after type in column declaration. [#37337](https://github.com/ClickHouse/ClickHouse/pull/37337) ([Igor Nikonov](https://github.com/devcrafter)). -* optimize file segment PARTIALLY_DOWNLOADED get read buffer. [#37338](https://github.com/ClickHouse/ClickHouse/pull/37338) ([xiedeyantu](https://github.com/xiedeyantu)). -* Try to improve short circuit functions processing to fix problems with stress tests. [#37384](https://github.com/ClickHouse/ClickHouse/pull/37384) ([Kruglov Pavel](https://github.com/Avogar)). -* Generate multiple columns with UUID (generateUUIDv4(1), generateUUIDv4(2)) [#37395](https://github.com/ClickHouse/ClickHouse/issues/37395). [#37415](https://github.com/ClickHouse/ClickHouse/pull/37415) ([Memo](https://github.com/Joeywzr)). -* Fix extremely rare deadlock during part fetch in zero-copy replication. Fixes [#37423](https://github.com/ClickHouse/ClickHouse/issues/37423). [#37424](https://github.com/ClickHouse/ClickHouse/pull/37424) ([metahys](https://github.com/metahys)). -* Don't allow to create storage with unknown data format. [#37450](https://github.com/ClickHouse/ClickHouse/pull/37450) ([Kruglov Pavel](https://github.com/Avogar)). -* Set `global_memory_usage_overcommit_max_wait_microseconds` default value to 5 seconds. Add info about `OvercommitTracker` to OOM exception message. Add `MemoryOvercommitWaitTimeMicroseconds` profile event. [#37460](https://github.com/ClickHouse/ClickHouse/pull/37460) ([Dmitry Novik](https://github.com/novikd)). -* Do not display `-0.0` CPU time in clickhouse-client. It can appear due to rounding errors. This closes [#38003](https://github.com/ClickHouse/ClickHouse/issues/38003). This closes [#38038](https://github.com/ClickHouse/ClickHouse/issues/38038). [#38064](https://github.com/ClickHouse/ClickHouse/pull/38064) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Play UI: Keep controls in place when the page is scrolled horizontally. This makes edits comfortable even if the table is wide and it was scrolled far to the right. The feature proposed by Maksym Tereshchenko from CaspianDB. [#37470](https://github.com/ClickHouse/ClickHouse/pull/37470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Modify query div in play.html to be extendable beyond 20% height. In case of very long queries it is helpful to extend the textarea element, only today, since the div is fixed height, the extended textarea hides the data div underneath. With this fix, extending the textarea element will push the data div down/up such the extended textarea won't hide it. Also, keeps query box width 100% even when the user adjusting the size of the query textarea. [#37488](https://github.com/ClickHouse/ClickHouse/pull/37488) ([guyco87](https://github.com/guyco87)). -* Added `ProfileEvents` for introspection of type of written (inserted or merged) parts (`Inserted{Wide/Compact/InMemory}Parts`, `MergedInto{Wide/Compact/InMemory}Parts`. Added column `part_type` to `system.part_log`. Resolves [#37495](https://github.com/ClickHouse/ClickHouse/issues/37495). [#37536](https://github.com/ClickHouse/ClickHouse/pull/37536) ([Anton Popov](https://github.com/CurtizJ)). -* clickhouse-keeper improvement: move broken logs to a timestamped folder. [#37565](https://github.com/ClickHouse/ClickHouse/pull/37565) ([Antonio Andelic](https://github.com/antonio2368)). -* Do not write expired columns by TTL after subsequent merges (before only first merge/optimize of the part will not write expired by TTL columns, all other will do). [#37570](https://github.com/ClickHouse/ClickHouse/pull/37570) ([Azat Khuzhin](https://github.com/azat)). -* More precise result of the `dumpColumnStructure` miscellaneous function in presence of LowCardinality or Sparse columns. In previous versions, these functions were converting the argument to a full column before returning the result. This is needed to provide an answer in [#6935](https://github.com/ClickHouse/ClickHouse/issues/6935). [#37633](https://github.com/ClickHouse/ClickHouse/pull/37633) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* clickhouse-keeper: store only unique session IDs for watches. [#37641](https://github.com/ClickHouse/ClickHouse/pull/37641) ([Azat Khuzhin](https://github.com/azat)). -* Fix possible "Cannot write to finalized buffer". [#37645](https://github.com/ClickHouse/ClickHouse/pull/37645) ([Azat Khuzhin](https://github.com/azat)). -* Add setting `support_batch_delete` for `DiskS3` to disable multiobject delete calls, which Google Cloud Storage doesn't support. [#37659](https://github.com/ClickHouse/ClickHouse/pull/37659) ([Fred Wulff](https://github.com/frew)). -* Add an option to disable connection pooling in ODBC bridge. [#37705](https://github.com/ClickHouse/ClickHouse/pull/37705) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants` added support nullable `HIERARCHICAL` attribute in dictionaries. Closes [#35521](https://github.com/ClickHouse/ClickHouse/issues/35521). [#37805](https://github.com/ClickHouse/ClickHouse/pull/37805) ([Maksim Kita](https://github.com/kitaisreal)). -* Expose BoringSSL version related info in the `system.build_options` table. [#37850](https://github.com/ClickHouse/ClickHouse/pull/37850) ([Bharat Nallan](https://github.com/bharatnc)). -* Now clickhouse-server removes `delete_tmp` directories on server start. Fixes [#26503](https://github.com/ClickHouse/ClickHouse/issues/26503). [#37906](https://github.com/ClickHouse/ClickHouse/pull/37906) ([alesapin](https://github.com/alesapin)). -* Clean up broken detached parts after timeout. Closes [#25195](https://github.com/ClickHouse/ClickHouse/issues/25195). [#37975](https://github.com/ClickHouse/ClickHouse/pull/37975) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Now in MergeTree table engines family failed-to-move parts will be removed instantly. [#37994](https://github.com/ClickHouse/ClickHouse/pull/37994) ([alesapin](https://github.com/alesapin)). -* Now if setting `always_fetch_merged_part` is enabled for ReplicatedMergeTree merges will try to find parts on other replicas rarely with smaller load for [Zoo]Keeper. [#37995](https://github.com/ClickHouse/ClickHouse/pull/37995) ([alesapin](https://github.com/alesapin)). -* Add implicit grants with grant option too. For example `GRANT CREATE TABLE ON test.* TO A WITH GRANT OPTION` now allows `A` to execute `GRANT CREATE VIEW ON test.* TO B`. [#38017](https://github.com/ClickHouse/ClickHouse/pull/38017) ([Vitaly Baranov](https://github.com/vitlibar)). - -#### Build/Testing/Packaging Improvement - -* Use `clang-14` and LLVM infrastructure version 14 for builds. This closes [#34681](https://github.com/ClickHouse/ClickHouse/issues/34681). [#34754](https://github.com/ClickHouse/ClickHouse/pull/34754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: `clang-14` has [a bug](https://github.com/google/sanitizers/issues/1540) in ThreadSanitizer that makes our CI work worse. -* Allow to drop privileges at startup. This simplifies Docker images. Closes [#36293](https://github.com/ClickHouse/ClickHouse/issues/36293). [#36341](https://github.com/ClickHouse/ClickHouse/pull/36341) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add docs spellcheck to CI. [#37790](https://github.com/ClickHouse/ClickHouse/pull/37790) ([Vladimir C](https://github.com/vdimir)). -* Fix overly aggressive stripping which removed the embedded hash required for checking the consistency of the executable. [#37993](https://github.com/ClickHouse/ClickHouse/pull/37993) ([Robert Schulze](https://github.com/rschu1ze)). - -#### Bug Fix - -* Fix `SELECT ... INTERSECT` and `EXCEPT SELECT` statements with constant string types. [#37738](https://github.com/ClickHouse/ClickHouse/pull/37738) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)). -* (experimental WINDOW VIEW) Fix `addDependency` in WindowView. This bug can be reproduced like [#37237](https://github.com/ClickHouse/ClickHouse/issues/37237). [#37224](https://github.com/ClickHouse/ClickHouse/pull/37224) ([vxider](https://github.com/Vxider)). -* Fix inconsistency in ORDER BY ... WITH FILL feature. Query, containing ORDER BY ... WITH FILL, can generate extra rows when multiple WITH FILL columns are present. [#38074](https://github.com/ClickHouse/ClickHouse/pull/38074) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* This PR moving `addDependency` from constructor to `startup()` to avoid adding dependency to a *dropped* table, fix [#37237](https://github.com/ClickHouse/ClickHouse/issues/37237). [#37243](https://github.com/ClickHouse/ClickHouse/pull/37243) ([vxider](https://github.com/Vxider)). -* Fix inserting defaults for missing values in columnar formats. Previously missing columns were filled with defaults for types, not for columns. [#37253](https://github.com/ClickHouse/ClickHouse/pull/37253) ([Kruglov Pavel](https://github.com/Avogar)). -* (experimental Object type) Fix some cases of insertion nested arrays to columns of type `Object`. [#37305](https://github.com/ClickHouse/ClickHouse/pull/37305) ([Anton Popov](https://github.com/CurtizJ)). -* Fix unexpected errors with a clash of constant strings in aggregate function, prewhere and join. Close [#36891](https://github.com/ClickHouse/ClickHouse/issues/36891). [#37336](https://github.com/ClickHouse/ClickHouse/pull/37336) ([Vladimir C](https://github.com/vdimir)). -* Fix projections with GROUP/ORDER BY in query and optimize_aggregation_in_order (before the result was incorrect since only finish sorting was performed). [#37342](https://github.com/ClickHouse/ClickHouse/pull/37342) ([Azat Khuzhin](https://github.com/azat)). -* Fixed error with symbols in key name in S3. Fixes [#33009](https://github.com/ClickHouse/ClickHouse/issues/33009). [#37344](https://github.com/ClickHouse/ClickHouse/pull/37344) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Throw an exception when GROUPING SETS used with ROLLUP or CUBE. [#37367](https://github.com/ClickHouse/ClickHouse/pull/37367) ([Dmitry Novik](https://github.com/novikd)). -* Fix LOGICAL_ERROR in getMaxSourcePartsSizeForMerge during merges (in case of non standard, greater, values of `background_pool_size`/`background_merges_mutations_concurrency_ratio` has been specified in `config.xml` (new way) not in `users.xml` (deprecated way)). [#37413](https://github.com/ClickHouse/ClickHouse/pull/37413) ([Azat Khuzhin](https://github.com/azat)). -* Stop removing UTF-8 BOM in RowBinary format. [#37428](https://github.com/ClickHouse/ClickHouse/pull/37428) ([Paul Loyd](https://github.com/loyd)). [#37428](https://github.com/ClickHouse/ClickHouse/pull/37428) ([Paul Loyd](https://github.com/loyd)). -* clickhouse-keeper bugfix: fix force recovery for single node cluster. [#37440](https://github.com/ClickHouse/ClickHouse/pull/37440) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix logical error in normalizeUTF8 functions. Closes [#37298](https://github.com/ClickHouse/ClickHouse/issues/37298). [#37443](https://github.com/ClickHouse/ClickHouse/pull/37443) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix cast lowcard of nullable in JoinSwitcher, close [#37385](https://github.com/ClickHouse/ClickHouse/issues/37385). [#37453](https://github.com/ClickHouse/ClickHouse/pull/37453) ([Vladimir C](https://github.com/vdimir)). -* Fix named tuples output in ORC/Arrow/Parquet formats. [#37458](https://github.com/ClickHouse/ClickHouse/pull/37458) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix optimization of monotonous functions in ORDER BY clause in presence of GROUPING SETS. Fixes [#37401](https://github.com/ClickHouse/ClickHouse/issues/37401). [#37493](https://github.com/ClickHouse/ClickHouse/pull/37493) ([Dmitry Novik](https://github.com/novikd)). -* Fix error on joining with dictionary on some conditions. Close [#37386](https://github.com/ClickHouse/ClickHouse/issues/37386). [#37530](https://github.com/ClickHouse/ClickHouse/pull/37530) ([Vladimir C](https://github.com/vdimir)). -* Prohibit `optimize_aggregation_in_order` with `GROUPING SETS` (fixes `LOGICAL_ERROR`). [#37542](https://github.com/ClickHouse/ClickHouse/pull/37542) ([Azat Khuzhin](https://github.com/azat)). -* Fix wrong dump information of ActionsDAG. [#37587](https://github.com/ClickHouse/ClickHouse/pull/37587) ([zhanglistar](https://github.com/zhanglistar)). -* Fix converting types for UNION queries (may produce LOGICAL_ERROR). [#37593](https://github.com/ClickHouse/ClickHouse/pull/37593) ([Azat Khuzhin](https://github.com/azat)). -* Fix `WITH FILL` modifier with negative intervals in `STEP` clause. Fixes [#37514](https://github.com/ClickHouse/ClickHouse/issues/37514). [#37600](https://github.com/ClickHouse/ClickHouse/pull/37600) ([Anton Popov](https://github.com/CurtizJ)). -* Fix illegal joinGet array usage when ` join_use_nulls = 1`. This fixes [#37562](https://github.com/ClickHouse/ClickHouse/issues/37562) . [#37650](https://github.com/ClickHouse/ClickHouse/pull/37650) ([Amos Bird](https://github.com/amosbird)). -* Fix columns number mismatch in cross join, close [#37561](https://github.com/ClickHouse/ClickHouse/issues/37561). [#37653](https://github.com/ClickHouse/ClickHouse/pull/37653) ([Vladimir C](https://github.com/vdimir)). -* Fix segmentation fault in `show create table` from mysql database when it is configured with named collections. Closes [#37683](https://github.com/ClickHouse/ClickHouse/issues/37683). [#37690](https://github.com/ClickHouse/ClickHouse/pull/37690) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix RabbitMQ Storage not being able to startup on server restart if storage was create without SETTINGS clause. Closes [#37463](https://github.com/ClickHouse/ClickHouse/issues/37463). [#37691](https://github.com/ClickHouse/ClickHouse/pull/37691) ([Kseniia Sumarokova](https://github.com/kssenii)). -* SQL user defined functions disable CREATE/DROP in readonly mode. Closes [#37280](https://github.com/ClickHouse/ClickHouse/issues/37280). [#37699](https://github.com/ClickHouse/ClickHouse/pull/37699) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix formatting of Nullable arguments for executable user defined functions. Closes [#35897](https://github.com/ClickHouse/ClickHouse/issues/35897). [#37711](https://github.com/ClickHouse/ClickHouse/pull/37711) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix optimization enabled by setting `optimize_monotonous_functions_in_order_by` in distributed queries. Fixes [#36037](https://github.com/ClickHouse/ClickHouse/issues/36037). [#37724](https://github.com/ClickHouse/ClickHouse/pull/37724) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible logical error: `Invalid Field get from type UInt64 to type Float64` in `values` table function. Closes [#37602](https://github.com/ClickHouse/ClickHouse/issues/37602). [#37754](https://github.com/ClickHouse/ClickHouse/pull/37754) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible segfault in schema inference in case of exception in SchemaReader constructor. Closes [#37680](https://github.com/ClickHouse/ClickHouse/issues/37680). [#37760](https://github.com/ClickHouse/ClickHouse/pull/37760) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix setting cast_ipv4_ipv6_default_on_conversion_error for internal cast function. Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#37761](https://github.com/ClickHouse/ClickHouse/pull/37761) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix toString error on DatatypeDate32. [#37775](https://github.com/ClickHouse/ClickHouse/pull/37775) ([LiuNeng](https://github.com/liuneng1994)). -* The clickhouse-keeper setting `dead_session_check_period_ms` was transformed into microseconds (multiplied by 1000), which lead to dead sessions only being cleaned up after several minutes (instead of 500ms). [#37824](https://github.com/ClickHouse/ClickHouse/pull/37824) ([Michael Lex](https://github.com/mlex)). -* Fix possible "No more packets are available" for distributed queries (in case of `async_socket_for_remote`/`use_hedged_requests` is disabled). [#37826](https://github.com/ClickHouse/ClickHouse/pull/37826) ([Azat Khuzhin](https://github.com/azat)). -* (experimental WINDOW VIEW) Do not drop the inner target table when executing `ALTER TABLE … MODIFY QUERY` in WindowView. [#37879](https://github.com/ClickHouse/ClickHouse/pull/37879) ([vxider](https://github.com/Vxider)). -* Fix directory ownership of coordination dir in clickhouse-keeper Docker image. Fixes [#37914](https://github.com/ClickHouse/ClickHouse/issues/37914). [#37915](https://github.com/ClickHouse/ClickHouse/pull/37915) ([James Maidment](https://github.com/jamesmaidment)). -* Dictionaries fix custom query with update field and `{condition}`. Closes [#33746](https://github.com/ClickHouse/ClickHouse/issues/33746). [#37947](https://github.com/ClickHouse/ClickHouse/pull/37947) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix possible incorrect result of `SELECT ... WITH FILL` in the case when `ORDER BY` should be applied after `WITH FILL` result (e.g. for outer query). Incorrect result was caused by optimization for `ORDER BY` expressions ([#35623](https://github.com/ClickHouse/ClickHouse/issues/35623)). Closes [#37904](https://github.com/ClickHouse/ClickHouse/issues/37904). [#37959](https://github.com/ClickHouse/ClickHouse/pull/37959) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* (experimental WINDOW VIEW) Add missing default columns when pushing to the target table in WindowView, fix [#37815](https://github.com/ClickHouse/ClickHouse/issues/37815). [#37965](https://github.com/ClickHouse/ClickHouse/pull/37965) ([vxider](https://github.com/Vxider)). -* Fixed too large stack frame that would cause compilation to fail. [#37996](https://github.com/ClickHouse/ClickHouse/pull/37996) ([Han Shukai](https://github.com/KinderRiven)). -* When open enable_filesystem_query_cache_limit, throw Reserved cache size exceeds the remaining cache size. [#38004](https://github.com/ClickHouse/ClickHouse/pull/38004) ([xiedeyantu](https://github.com/xiedeyantu)). -* Fix converting types for UNION queries (may produce LOGICAL_ERROR). [#34775](https://github.com/ClickHouse/ClickHouse/pull/34775) ([Azat Khuzhin](https://github.com/azat)). -* TTL merge may not be scheduled again if BackgroundExecutor is busy. --merges_with_ttl_counter is increased in selectPartsToMerge() --merge task will be ignored if BackgroundExecutor is busy --merges_with_ttl_counter will not be decrease. [#36387](https://github.com/ClickHouse/ClickHouse/pull/36387) ([lthaooo](https://github.com/lthaooo)). -* Fix overridden settings value of `normalize_function_names`. [#36937](https://github.com/ClickHouse/ClickHouse/pull/36937) ([李扬](https://github.com/taiyang-li)). -* Fix for exponential time decaying window functions. Now respecting boundaries of the window. [#36944](https://github.com/ClickHouse/ClickHouse/pull/36944) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Fix possible heap-use-after-free error when reading system.projection_parts and system.projection_parts_columns . This fixes [#37184](https://github.com/ClickHouse/ClickHouse/issues/37184). [#37185](https://github.com/ClickHouse/ClickHouse/pull/37185) ([Amos Bird](https://github.com/amosbird)). -* Fixed `DateTime64` fractional seconds behavior prior to Unix epoch. [#37697](https://github.com/ClickHouse/ClickHouse/pull/37697) ([Andrey Zvonov](https://github.com/zvonand)). [#37039](https://github.com/ClickHouse/ClickHouse/pull/37039) ([李扬](https://github.com/taiyang-li)). - -### ClickHouse release 22.5, 2022-05-19 - -#### Upgrade Notes - -* Now, background merges, mutations, and `OPTIMIZE` will not increment `SelectedRows` and `SelectedBytes` metrics. They (still) will increment `MergedRows` and `MergedUncompressedBytes` as it was before. This only affects the metric values and makes them better. This change does not introduce any incompatibility, but you may wonder about the changes to the metrics, so we put in this category. [#37040](https://github.com/ClickHouse/ClickHouse/pull/37040) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Updated the BoringSSL module to the official FIPS compliant version. This makes ClickHouse FIPS compliant in this area. [#35914](https://github.com/ClickHouse/ClickHouse/pull/35914) ([Meena-Renganathan](https://github.com/Meena-Renganathan)). The ciphers `aes-192-cfb128` and `aes-256-cfb128` were removed, because they are not included in the FIPS certified version of BoringSSL. -* `max_memory_usage` setting is removed from the default user profile in `users.xml`. This enables flexible memory limits for queries instead of the old rigid limit of 10 GB. -* Disable `log_query_threads` setting by default. It controls the logging of statistics about every thread participating in query execution. After supporting asynchronous reads, the total number of distinct thread ids became too large, and logging into the `query_thread_log` has become too heavy. [#37077](https://github.com/ClickHouse/ClickHouse/pull/37077) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove function `groupArraySorted` which has a bug. [#36822](https://github.com/ClickHouse/ClickHouse/pull/36822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature - -* Enable memory overcommit by default. [#35921](https://github.com/ClickHouse/ClickHouse/pull/35921) ([Dmitry Novik](https://github.com/novikd)). -* Add support of GROUPING SETS in GROUP BY clause. This implementation supports a parallel processing of grouping sets. [#33631](https://github.com/ClickHouse/ClickHouse/pull/33631) ([Dmitry Novik](https://github.com/novikd)). -* Added `system.certificates` table. [#37142](https://github.com/ClickHouse/ClickHouse/pull/37142) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Adds `h3Line`, `h3Distance` and `h3HexRing` functions. [#37030](https://github.com/ClickHouse/ClickHouse/pull/37030) ([Bharat Nallan](https://github.com/bharatnc)). -* New single binary based diagnostics tool (clickhouse-diagnostics). [#36705](https://github.com/ClickHouse/ClickHouse/pull/36705) ([Dale McDiarmid](https://github.com/gingerwizard)). -* Add output format `Prometheus` [#36051](https://github.com/ClickHouse/ClickHouse/issues/36051). [#36206](https://github.com/ClickHouse/ClickHouse/pull/36206) ([Vladimir C](https://github.com/vdimir)). -* Add `MySQLDump` input format. It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. [#36667](https://github.com/ClickHouse/ClickHouse/pull/36667) ([Kruglov Pavel](https://github.com/Avogar)). -* Show the `total_rows` and `total_bytes` fields in `system.tables` for temporary tables. [#36401](https://github.com/ClickHouse/ClickHouse/issues/36401). [#36439](https://github.com/ClickHouse/ClickHouse/pull/36439) ([xiedeyantu](https://github.com/xiedeyantu)). -* Allow to override `parts_to_delay_insert` and `parts_to_throw_insert` with query-level settings. If they are defined, they will override table-level settings. [#36371](https://github.com/ClickHouse/ClickHouse/pull/36371) ([Memo](https://github.com/Joeywzr)). - -#### Experimental Feature - -* Implemented L1, L2, Linf, Cosine distance functions for arrays and L1, L2, Linf norm functions for arrays. - [#37033](https://github.com/ClickHouse/ClickHouse/pull/37033) ([qieqieplus](https://github.com/qieqieplus)). Caveat: the functions will be renamed. -* Improve the `WATCH` query in WindowView: 1. Reduce the latency of providing query results by calling the `fire_condition` signal. 2. Makes the cancel query operation(ctrl-c) faster, by checking `isCancelled()` more frequently. [#37226](https://github.com/ClickHouse/ClickHouse/pull/37226) ([vxider](https://github.com/Vxider)). -* Introspection for remove filesystem cache. [#36802](https://github.com/ClickHouse/ClickHouse/pull/36802) ([Han Shukai](https://github.com/KinderRiven)). -* Added new hash function `wyHash64` for SQL. [#36467](https://github.com/ClickHouse/ClickHouse/pull/36467) ([olevino](https://github.com/olevino)). -* Improvement for replicated databases: Added `SYSTEM SYNC DATABASE REPLICA` query which allows to sync tables metadata inside Replicated database, because currently synchronisation is asynchronous. [#35944](https://github.com/ClickHouse/ClickHouse/pull/35944) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Improvement for remote filesystem cache: Better read from cache. [#37054](https://github.com/ClickHouse/ClickHouse/pull/37054) ([Kseniia Sumarokova](https://github.com/kssenii)). Improve `SYSTEM DROP FILESYSTEM CACHE` query: `` option and `FORCE` option. [#36639](https://github.com/ClickHouse/ClickHouse/pull/36639) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Improvement for semistructured data: Allow to cast columns of type `Object(...)` to `Object(Nullable(...))`. [#36564](https://github.com/ClickHouse/ClickHouse/pull/36564) ([awakeljw](https://github.com/awakeljw)). -* Improvement for parallel replicas: We create a local interpreter if we want to execute query on localhost replica. But for when executing query on multiple replicas we rely on the fact that a connection exists so replicas can talk to coordinator. It is now improved and localhost replica can talk to coordinator directly in the same process. [#36281](https://github.com/ClickHouse/ClickHouse/pull/36281) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). - -#### Performance Improvement - -* Improve performance of `avg`, `sum` aggregate functions if used without GROUP BY expression. [#37257](https://github.com/ClickHouse/ClickHouse/pull/37257) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of unary arithmetic functions (`bitCount`, `bitNot`, `abs`, `intExp2`, `intExp10`, `negate`, `roundAge`, `roundDuration`, `roundToExp2`, `sign`) using dynamic dispatch. [#37289](https://github.com/ClickHouse/ClickHouse/pull/37289) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of ORDER BY, MergeJoin, insertion into MergeTree using JIT compilation of sort columns comparator. [#34469](https://github.com/ClickHouse/ClickHouse/pull/34469) ([Maksim Kita](https://github.com/kitaisreal)). -* Change structure of `system.asynchronous_metric_log`. It will take about 10 times less space. This closes [#36357](https://github.com/ClickHouse/ClickHouse/issues/36357). The field `event_time_microseconds` was removed, because it is useless. [#36360](https://github.com/ClickHouse/ClickHouse/pull/36360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Load marks for only necessary columns when reading wide parts. [#36879](https://github.com/ClickHouse/ClickHouse/pull/36879) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Improves performance of file descriptor cache by narrowing mutex scopes. [#36682](https://github.com/ClickHouse/ClickHouse/pull/36682) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Improve performance of reading from storage `File` and table functions `file` in case when path has globs and matched directory contains large number of files. [#36647](https://github.com/ClickHouse/ClickHouse/pull/36647) ([Anton Popov](https://github.com/CurtizJ)). -* Apply parallel parsing for input format `HiveText`, which can speed up HiveText parsing by 2x when reading local file. [#36650](https://github.com/ClickHouse/ClickHouse/pull/36650) ([李扬](https://github.com/taiyang-li)). -* The default `HashJoin` is not thread safe for inserting right table's rows and run it in a single thread. When the right table is large, the join process is too slow with low cpu utilization. [#36415](https://github.com/ClickHouse/ClickHouse/pull/36415) ([lgbo](https://github.com/lgbo-ustc)). -* Allow to rewrite `select countDistinct(a) from t` to `select count(1) from (select a from t groupBy a)`. [#35993](https://github.com/ClickHouse/ClickHouse/pull/35993) ([zhanglistar](https://github.com/zhanglistar)). -* Transform OR LIKE chain to multiMatchAny. Will enable once we have more confidence it works. [#34932](https://github.com/ClickHouse/ClickHouse/pull/34932) ([Daniel Kutenin](https://github.com/danlark1)). -* Improve performance of some functions with inlining. [#34544](https://github.com/ClickHouse/ClickHouse/pull/34544) ([Daniel Kutenin](https://github.com/danlark1)). -* Add a branch to avoid unnecessary memcpy in readBig. It improves performance somewhat. [#36095](https://github.com/ClickHouse/ClickHouse/pull/36095) ([jasperzhu](https://github.com/jinjunzh)). -* Implement partial GROUP BY key for optimize_aggregation_in_order. [#35111](https://github.com/ClickHouse/ClickHouse/pull/35111) ([Azat Khuzhin](https://github.com/azat)). - -#### Improvement - -* Show names of erroneous files in case of parsing errors while executing table functions `file`, `s3` and `url`. [#36314](https://github.com/ClickHouse/ClickHouse/pull/36314) ([Anton Popov](https://github.com/CurtizJ)). -* Allowed to increase the number of threads for executing background operations (merges, mutations, moves and fetches) at runtime if they are specified at top level config. [#36425](https://github.com/ClickHouse/ClickHouse/pull/36425) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Now date time conversion functions that generates time before 1970-01-01 00:00:00 with partial hours/minutes timezones will be saturated to zero instead of overflow. This is the continuation of https://github.com/ClickHouse/ClickHouse/pull/29953 which addresses https://github.com/ClickHouse/ClickHouse/pull/29953#discussion_r800550280 . Mark as improvement because it's implementation defined behavior (and very rare case) and we are allowed to break it. [#36656](https://github.com/ClickHouse/ClickHouse/pull/36656) ([Amos Bird](https://github.com/amosbird)). -* Add a warning if someone running clickhouse-server with log level "test". The log level "test" was added recently and cannot be used in production due to inevitable, unavoidable, fatal and life-threatening performance degradation. [#36824](https://github.com/ClickHouse/ClickHouse/pull/36824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Parse collations in CREATE TABLE, throw exception or ignore. closes [#35892](https://github.com/ClickHouse/ClickHouse/issues/35892). [#36271](https://github.com/ClickHouse/ClickHouse/pull/36271) ([yuuch](https://github.com/yuuch)). -* Option `compatibility_ignore_auto_increment_in_create_table` allows ignoring `AUTO_INCREMENT` keyword in a column declaration to simplify migration from MySQL. [#37178](https://github.com/ClickHouse/ClickHouse/pull/37178) ([Igor Nikonov](https://github.com/devcrafter)). -* Add aliases `JSONLines` and `NDJSON` for `JSONEachRow`. Closes [#36303](https://github.com/ClickHouse/ClickHouse/issues/36303). [#36327](https://github.com/ClickHouse/ClickHouse/pull/36327) ([flynn](https://github.com/ucasfl)). -* Limit the max partitions could be queried for each hive table. Avoid resource overruns. [#37281](https://github.com/ClickHouse/ClickHouse/pull/37281) ([lgbo](https://github.com/lgbo-ustc)). -* Added implicit cast for `h3kRing` function second argument to improve usability. Closes [#35432](https://github.com/ClickHouse/ClickHouse/issues/35432). [#37189](https://github.com/ClickHouse/ClickHouse/pull/37189) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix progress indication for `INSERT SELECT` in `clickhouse-local` for any query and for file progress in client, more correct file progress. [#37075](https://github.com/ClickHouse/ClickHouse/pull/37075) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix bug which can lead to forgotten outdated parts in MergeTree table engines family in case of filesystem failures during parts removal. Before fix they will be removed only after first server restart. [#37014](https://github.com/ClickHouse/ClickHouse/pull/37014) ([alesapin](https://github.com/alesapin)). -* Implemented a new mode of handling row policies which can be enabled in the main configuration which enables users without permissive row policies to read rows. [#36997](https://github.com/ClickHouse/ClickHouse/pull/36997) ([Vitaly Baranov](https://github.com/vitlibar)). -* Play UI: Nullable numbers will be aligned to the right in table cells. This closes [#36982](https://github.com/ClickHouse/ClickHouse/issues/36982). [#36988](https://github.com/ClickHouse/ClickHouse/pull/36988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Play UI: If there is one row in result and more than a few columns, display the result vertically. Continuation of [#36811](https://github.com/ClickHouse/ClickHouse/issues/36811). [#36842](https://github.com/ClickHouse/ClickHouse/pull/36842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Cleanup CSS in Play UI. The pixels are more evenly placed. Better usability for long content in table cells. [#36569](https://github.com/ClickHouse/ClickHouse/pull/36569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Finalize write buffers in case of exception to avoid doing it in destructors. Hope it fixes: [#36907](https://github.com/ClickHouse/ClickHouse/issues/36907). [#36979](https://github.com/ClickHouse/ClickHouse/pull/36979) ([Kruglov Pavel](https://github.com/Avogar)). -* After [#36425](https://github.com/ClickHouse/ClickHouse/issues/36425) settings like `background_fetches_pool_size` became obsolete and can appear in top level config, but clickhouse throws and exception like `Error updating configuration from '/etc/clickhouse-server/config.xml' config.: Code: 137. DB::Exception: A setting 'background_fetches_pool_size' appeared at top level in config /etc/clickhouse-server/config.xml.` This is fixed. [#36917](https://github.com/ClickHouse/ClickHouse/pull/36917) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add extra diagnostic info (if applicable) when sending exception to other server. [#36872](https://github.com/ClickHouse/ClickHouse/pull/36872) ([tavplubix](https://github.com/tavplubix)). -* Allow to execute hash functions with arguments of type `Array(Tuple(..))`. [#36812](https://github.com/ClickHouse/ClickHouse/pull/36812) ([Anton Popov](https://github.com/CurtizJ)). -* Added `user_defined_path` config setting. [#36753](https://github.com/ClickHouse/ClickHouse/pull/36753) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow cluster macro in `s3Cluster` table function. [#36726](https://github.com/ClickHouse/ClickHouse/pull/36726) ([Vadim Volodin](https://github.com/PolyProgrammist)). -* Properly cancel INSERT queries in `clickhouse-client`/`clickhouse-local`. [#36710](https://github.com/ClickHouse/ClickHouse/pull/36710) ([Azat Khuzhin](https://github.com/azat)). -* Allow to cancel a query while still keeping a decent query id in `MySQLHandler`. [#36699](https://github.com/ClickHouse/ClickHouse/pull/36699) ([Amos Bird](https://github.com/amosbird)). -* Add `is_all_data_sent` column into `system.processes`, and improve internal testing hardening check based on it. [#36649](https://github.com/ClickHouse/ClickHouse/pull/36649) ([Azat Khuzhin](https://github.com/azat)). -* The metrics about time spent reading from s3 now calculated correctly. Close [#35483](https://github.com/ClickHouse/ClickHouse/issues/35483). [#36572](https://github.com/ClickHouse/ClickHouse/pull/36572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Allow file descriptors in table function file if it is run in clickhouse-local. [#36562](https://github.com/ClickHouse/ClickHouse/pull/36562) ([wuxiaobai24](https://github.com/wuxiaobai24)). -* Allow names of tuple elements that start from digits. [#36544](https://github.com/ClickHouse/ClickHouse/pull/36544) ([Anton Popov](https://github.com/CurtizJ)). -* Now clickhouse-benchmark can read authentication info from environment variables. [#36497](https://github.com/ClickHouse/ClickHouse/pull/36497) ([Anton Kozlov](https://github.com/tonickkozlov)). -* `clickhouse-keeper` improvement: add support for force recovery which allows you to reconfigure cluster without quorum. [#36258](https://github.com/ClickHouse/ClickHouse/pull/36258) ([Antonio Andelic](https://github.com/antonio2368)). -* Improve schema inference for JSON objects. [#36207](https://github.com/ClickHouse/ClickHouse/pull/36207) ([Kruglov Pavel](https://github.com/Avogar)). -* Refactor code around schema inference with globs. Try next file from glob only if it makes sense (previously we tried next file in case of any error). Also it fixes [#36317](https://github.com/ClickHouse/ClickHouse/issues/36317). [#36205](https://github.com/ClickHouse/ClickHouse/pull/36205) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a separate `CLUSTER` grant (and `access_control_improvements.on_cluster_queries_require_cluster_grant` configuration directive, for backward compatibility, default to `false`). [#35767](https://github.com/ClickHouse/ClickHouse/pull/35767) ([Azat Khuzhin](https://github.com/azat)). -* If the required amount of memory is available before the selected query stopped, all waiting queries continue execution. Now we don't stop any query if memory is freed before the moment when the selected query knows about the cancellation. [#35637](https://github.com/ClickHouse/ClickHouse/pull/35637) ([Dmitry Novik](https://github.com/novikd)). -* Nullables detection in protobuf. In proto3, default values are not sent on the wire. This makes it non-trivial to distinguish between null and default values for Nullable columns. A standard way to deal with this problem is to use Google wrappers to nest the target value within an inner message (see https://github.com/protocolbuffers/protobuf/blob/master/src/google/protobuf/wrappers.proto). In this case, a missing field is interpreted as null value, a field with missing value if interpreted as default value, and a field with regular value is interpreted as regular value. However, ClickHouse interprets Google wrappers as nested columns. We propose to introduce special behaviour to detect Google wrappers and interpret them like in the description above. For example, to serialize values for a Nullable column `test`, we would use `google.protobuf.StringValue test` in our .proto schema. Note that these types are so called "well-known types" in Protobuf, implemented in the library itself. [#35149](https://github.com/ClickHouse/ClickHouse/pull/35149) ([Jakub Kuklis](https://github.com/jkuklis)). -* Added support for specifying `content_type` in predefined and static HTTP handler config. [#34916](https://github.com/ClickHouse/ClickHouse/pull/34916) ([Roman Nikonov](https://github.com/nic11)). -* Warn properly if use clickhouse-client --file without preceeding --external. Close [#34747](https://github.com/ClickHouse/ClickHouse/issues/34747). [#34765](https://github.com/ClickHouse/ClickHouse/pull/34765) ([李扬](https://github.com/taiyang-li)). -* Improve MySQL database engine to compatible with binary(0) dataType. [#37232](https://github.com/ClickHouse/ClickHouse/pull/37232) ([zzsmdfj](https://github.com/zzsmdfj)). -* Improve JSON report of clickhouse-benchmark. [#36473](https://github.com/ClickHouse/ClickHouse/pull/36473) ([Tian Xinhui](https://github.com/xinhuitian)). -* Server might refuse to start if it cannot resolve hostname of external ClickHouse dictionary. It's fixed. Fixes [#36451](https://github.com/ClickHouse/ClickHouse/issues/36451). [#36463](https://github.com/ClickHouse/ClickHouse/pull/36463) ([tavplubix](https://github.com/tavplubix)). - -#### Build/Testing/Packaging Improvement - -* Now `clickhouse-keeper` for the `x86_64` architecture is statically linked with [musl](https://musl.libc.org/) and doesn't depend on any system libraries. [#31833](https://github.com/ClickHouse/ClickHouse/pull/31833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* ClickHouse builds for `PowerPC64LE` architecture are now available in universal installation script `curl https://clickhouse.com/ | sh` and by direct link `https://builds.clickhouse.com/master/powerpc64le/clickhouse`. [#37095](https://github.com/ClickHouse/ClickHouse/pull/37095) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Limit PowerPC code generation to Power8 for better compatibility. This closes [#36025](https://github.com/ClickHouse/ClickHouse/issues/36025). [#36529](https://github.com/ClickHouse/ClickHouse/pull/36529) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Simplify performance test. This will give a chance for us to use it. [#36769](https://github.com/ClickHouse/ClickHouse/pull/36769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fail performance comparison on errors in the report. [#34797](https://github.com/ClickHouse/ClickHouse/pull/34797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add ZSTD support for Arrow. This fixes [#35283](https://github.com/ClickHouse/ClickHouse/issues/35283). [#35486](https://github.com/ClickHouse/ClickHouse/pull/35486) ([Sean Lafferty](https://github.com/seanlaff)). - -#### Bug Fix - -* Extracts Version ID if present from the URI and adds a request to the AWS HTTP URI. Closes [#31221](https://github.com/ClickHouse/ClickHouse/issues/31221). - [x] Extract `Version ID` from URI if present and reassemble without it. - [x] Configure `AWS HTTP URI` object with request. - [x] Unit Tests: [`gtest_s3_uri`](https://github.com/ClickHouse/ClickHouse/blob/2340a6c6849ebc05a8efbf97ba8de3ff9dc0eff4/src/IO/tests/gtest_s3_uri.cpp) - [x] Drop instrumentation commit. [#34571](https://github.com/ClickHouse/ClickHouse/pull/34571) ([Saad Ur Rahman](https://github.com/surahman)). -* Fix system.opentelemetry_span_log attribute.values alias to values instead of keys. [#37275](https://github.com/ClickHouse/ClickHouse/pull/37275) ([Aleksandr Razumov](https://github.com/ernado)). -* Fix Nullable(String) to Nullable(Bool/IPv4/IPv6) conversion Closes [#37221](https://github.com/ClickHouse/ClickHouse/issues/37221). [#37270](https://github.com/ClickHouse/ClickHouse/pull/37270) ([Kruglov Pavel](https://github.com/Avogar)). -* Experimental feature: Fix execution of mutations in tables, in which there exist columns of type `Object`. Using subcolumns of type `Object` in `WHERE` expression of `UPDATE` or `DELETE` queries is now allowed yet, as well as manipulating (`DROP`, `MODIFY`) of separate subcolumns. Fixes [#37205](https://github.com/ClickHouse/ClickHouse/issues/37205). [#37266](https://github.com/ClickHouse/ClickHouse/pull/37266) ([Anton Popov](https://github.com/CurtizJ)). -* Kafka does not need `group.id` on producer stage. In console log you can find Warning that describe this issue: ``` 2022.05.15 17:59:13.270227 [ 137 ] {} StorageKafka (topic-name): [rdk:CONFWARN] [thrd:app]: Configuration property group.id is a consumer property and will be ignored by this producer instance ```. [#37228](https://github.com/ClickHouse/ClickHouse/pull/37228) ([Mark Andreev](https://github.com/mrk-andreev)). -* Experimental feature (WindowView): Update `max_fired_watermark ` after blocks actually fired, in case delete data that hasn't been fired yet. [#37225](https://github.com/ClickHouse/ClickHouse/pull/37225) ([vxider](https://github.com/Vxider)). -* Fix "Cannot create column of type Set" for distributed queries with LIMIT BY. [#37193](https://github.com/ClickHouse/ClickHouse/pull/37193) ([Azat Khuzhin](https://github.com/azat)). -* Experimental feature: Now WindowView `WATCH EVENTS` query will not be terminated due to the nonempty Chunk created in `WindowViewSource.h:58`. [#37182](https://github.com/ClickHouse/ClickHouse/pull/37182) ([vxider](https://github.com/Vxider)). -* Enable `enable_global_with_statement` for subqueries, close [#37141](https://github.com/ClickHouse/ClickHouse/issues/37141). [#37166](https://github.com/ClickHouse/ClickHouse/pull/37166) ([Vladimir C](https://github.com/vdimir)). -* Fix implicit cast for optimize_skip_unused_shards_rewrite_in. [#37153](https://github.com/ClickHouse/ClickHouse/pull/37153) ([Azat Khuzhin](https://github.com/azat)). -* The ILIKE function on FixedString columns could have returned wrong results (i.e. match less than it should). [#37117](https://github.com/ClickHouse/ClickHouse/pull/37117) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix `GROUP BY` `AggregateFunction` (i.e. you `GROUP BY` by the column that has `AggregateFunction` type). [#37093](https://github.com/ClickHouse/ClickHouse/pull/37093) ([Azat Khuzhin](https://github.com/azat)). -* Experimental feature: Fix optimize_aggregation_in_order with prefix GROUP BY and *Array aggregate functions. [#37050](https://github.com/ClickHouse/ClickHouse/pull/37050) ([Azat Khuzhin](https://github.com/azat)). -* Fixed performance degradation of some INSERT SELECT queries with implicit aggregation. Fixes [#36792](https://github.com/ClickHouse/ClickHouse/issues/36792). [#37047](https://github.com/ClickHouse/ClickHouse/pull/37047) ([tavplubix](https://github.com/tavplubix)). -* Experimental feature: Fix in-order `GROUP BY` (`optimize_aggregation_in_order=1`) with `*Array` (`groupArrayArray`/...) aggregate functions. [#37046](https://github.com/ClickHouse/ClickHouse/pull/37046) ([Azat Khuzhin](https://github.com/azat)). -* Fix LowCardinality->ArrowDictionary invalid output when type of indexes is not UInt8. Closes [#36832](https://github.com/ClickHouse/ClickHouse/issues/36832). [#37043](https://github.com/ClickHouse/ClickHouse/pull/37043) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed problem with infs in `quantileTDigest`. Fixes [#32107](https://github.com/ClickHouse/ClickHouse/issues/32107). [#37021](https://github.com/ClickHouse/ClickHouse/pull/37021) ([Vladimir Chebotarev](https://github.com/excitoon)). -* Fix sending external tables data in HedgedConnections with max_parallel_replicas != 1. [#36981](https://github.com/ClickHouse/ClickHouse/pull/36981) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed logical error on `TRUNCATE` query in `Replicated` database. Fixes [#33747](https://github.com/ClickHouse/ClickHouse/issues/33747). [#36976](https://github.com/ClickHouse/ClickHouse/pull/36976) ([tavplubix](https://github.com/tavplubix)). -* Experimental feature: Fix stuck when dropping source table in WindowView. Closes [#35678](https://github.com/ClickHouse/ClickHouse/issues/35678). [#36967](https://github.com/ClickHouse/ClickHouse/pull/36967) ([vxider](https://github.com/Vxider)). -* Experimental feature (rocksdb cache): Fix issue: [#36671](https://github.com/ClickHouse/ClickHouse/issues/36671). [#36929](https://github.com/ClickHouse/ClickHouse/pull/36929) ([李扬](https://github.com/taiyang-li)). -* Experimental feature: Fix bugs when using multiple columns in WindowView by adding converting actions to make it possible to call`writeIntoWindowView` with a slightly different schema. [#36928](https://github.com/ClickHouse/ClickHouse/pull/36928) ([vxider](https://github.com/Vxider)). -* Fix bug in clickhouse-keeper which can lead to corrupted compressed log files in case of small load and restarts. [#36910](https://github.com/ClickHouse/ClickHouse/pull/36910) ([alesapin](https://github.com/alesapin)). -* Fix incorrect query result when doing constant aggregation. This fixes [#36728](https://github.com/ClickHouse/ClickHouse/issues/36728) . [#36888](https://github.com/ClickHouse/ClickHouse/pull/36888) ([Amos Bird](https://github.com/amosbird)). -* Experimental feature: Fix `current_size` count in cache. [#36887](https://github.com/ClickHouse/ClickHouse/pull/36887) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Experimental feature: Fix fire in window view with hop window [#34044](https://github.com/ClickHouse/ClickHouse/issues/34044). [#36861](https://github.com/ClickHouse/ClickHouse/pull/36861) ([vxider](https://github.com/Vxider)). -* Experimental feature: Fix incorrect cast in cached buffer from remote fs. [#36809](https://github.com/ClickHouse/ClickHouse/pull/36809) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix creation of tables with `flatten_nested = 0`. Previously unflattened `Nested` columns could be flattened after server restart. [#36803](https://github.com/ClickHouse/ClickHouse/pull/36803) ([Anton Popov](https://github.com/CurtizJ)). -* Fix some issues with async reads from remote filesystem which happened when reading low cardinality. [#36763](https://github.com/ClickHouse/ClickHouse/pull/36763) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Experimental feature: Fix insertion to columns of type `Object` from multiple files, e.g. via table function `file` with globs. [#36762](https://github.com/ClickHouse/ClickHouse/pull/36762) ([Anton Popov](https://github.com/CurtizJ)). -* Fix timeouts in Hedged requests. Connection hang right after sending remote query could lead to eternal waiting. [#36749](https://github.com/ClickHouse/ClickHouse/pull/36749) ([Kruglov Pavel](https://github.com/Avogar)). -* Experimental feature: Fix a bug of `groupBitmapAndState`/`groupBitmapOrState`/`groupBitmapXorState` on distributed table. [#36739](https://github.com/ClickHouse/ClickHouse/pull/36739) ([Zhang Yifan](https://github.com/zhangyifan27)). -* Experimental feature: During the [test](https://s3.amazonaws.com/clickhouse-test-reports/36376/1cb1c7275cb53769ab826772db9b71361bb3e413/stress_test__thread__actions_/clickhouse-server.clean.log) in [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that the one cache class was initialized twice, it throws a exception. Although the cause of this problem is not clear, there should be code logic of repeatedly loading disk in ClickHouse, so we need to make special judgment for this situation. [#36737](https://github.com/ClickHouse/ClickHouse/pull/36737) ([Han Shukai](https://github.com/KinderRiven)). -* Fix vertical merges in wide parts. Previously an exception `There is no column` can be thrown during merge. [#36707](https://github.com/ClickHouse/ClickHouse/pull/36707) ([Anton Popov](https://github.com/CurtizJ)). -* Fix server reload on port change (do not wait for current connections from query context). [#36700](https://github.com/ClickHouse/ClickHouse/pull/36700) ([Azat Khuzhin](https://github.com/azat)). -* Experimental feature: In the previous [PR](https://github.com/ClickHouse/ClickHouse/pull/36376), I found that testing (stateless tests, flaky check (address, actions)) is timeout. Moreover, testing locally can also trigger unstable system deadlocks. This problem still exists when using the latest source code of master. [#36697](https://github.com/ClickHouse/ClickHouse/pull/36697) ([Han Shukai](https://github.com/KinderRiven)). -* Experimental feature: Fix server restart if cache configuration changed. [#36685](https://github.com/ClickHouse/ClickHouse/pull/36685) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible heap-use-after-free in schema inference. Closes [#36661](https://github.com/ClickHouse/ClickHouse/issues/36661). [#36679](https://github.com/ClickHouse/ClickHouse/pull/36679) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed parsing of query settings in `CREATE` query when engine is not specified. Fixes https://github.com/ClickHouse/ClickHouse/pull/34187#issuecomment-1103812419. [#36642](https://github.com/ClickHouse/ClickHouse/pull/36642) ([tavplubix](https://github.com/tavplubix)). -* Experimental feature: Fix merges of wide parts with type `Object`. [#36637](https://github.com/ClickHouse/ClickHouse/pull/36637) ([Anton Popov](https://github.com/CurtizJ)). -* Fix format crash when default expression follow EPHEMERAL not literal. Closes [#36618](https://github.com/ClickHouse/ClickHouse/issues/36618). [#36633](https://github.com/ClickHouse/ClickHouse/pull/36633) ([flynn](https://github.com/ucasfl)). -* Fix `Missing column` exception which could happen while using `INTERPOLATE` with `ENGINE = MergeTree` table. [#36549](https://github.com/ClickHouse/ClickHouse/pull/36549) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Fix potential error with literals in `WHERE` for join queries. Close [#36279](https://github.com/ClickHouse/ClickHouse/issues/36279). [#36542](https://github.com/ClickHouse/ClickHouse/pull/36542) ([Vladimir C](https://github.com/vdimir)). -* Fix offset update ReadBufferFromEncryptedFile, which could cause undefined behaviour. [#36493](https://github.com/ClickHouse/ClickHouse/pull/36493) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix hostname sanity checks for Keeper cluster configuration. Add `keeper_server.host_checks_enabled` config to enable/disable those checks. [#36492](https://github.com/ClickHouse/ClickHouse/pull/36492) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix usage of executable user defined functions in GROUP BY. Before executable user defined functions cannot be used as expressions in GROUP BY. Closes [#36448](https://github.com/ClickHouse/ClickHouse/issues/36448). [#36486](https://github.com/ClickHouse/ClickHouse/pull/36486) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix possible exception with unknown packet from server in client. [#36481](https://github.com/ClickHouse/ClickHouse/pull/36481) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Experimental feature (please never use `system.session_log`, it is going to be removed): Add missing enum values in system.session_log table. Closes [#36474](https://github.com/ClickHouse/ClickHouse/issues/36474). [#36480](https://github.com/ClickHouse/ClickHouse/pull/36480) ([Memo](https://github.com/Joeywzr)). -* Fix bug in s3Cluster schema inference that let to the fact that not all data was read in the select from s3Cluster. The bug appeared in https://github.com/ClickHouse/ClickHouse/pull/35544. [#36434](https://github.com/ClickHouse/ClickHouse/pull/36434) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix nullptr dereference in JOIN and COLUMNS matcher. This fixes [#36416](https://github.com/ClickHouse/ClickHouse/issues/36416). This is for https://github.com/ClickHouse/ClickHouse/pull/36417. [#36430](https://github.com/ClickHouse/ClickHouse/pull/36430) ([Amos Bird](https://github.com/amosbird)). -* Fix dictionary reload for `ClickHouseDictionarySource` if it contains scalar subqueries. [#36390](https://github.com/ClickHouse/ClickHouse/pull/36390) ([lthaooo](https://github.com/lthaooo)). -* Fix assertion in JOIN, close [#36199](https://github.com/ClickHouse/ClickHouse/issues/36199). [#36201](https://github.com/ClickHouse/ClickHouse/pull/36201) ([Vladimir C](https://github.com/vdimir)). -* Queries with aliases inside special operators returned parsing error (was broken in 22.1). Example: `SELECT substring('test' AS t, 1, 1)`. [#36167](https://github.com/ClickHouse/ClickHouse/pull/36167) ([Maksim Kita](https://github.com/kitaisreal)). -* Experimental feature: Fix insertion of complex JSONs with nested arrays to columns of type `Object`. [#36077](https://github.com/ClickHouse/ClickHouse/pull/36077) ([Anton Popov](https://github.com/CurtizJ)). -* Fix ALTER DROP COLUMN of nested column with compact parts (i.e. `ALTER TABLE x DROP COLUMN n`, when there is column `n.d`). [#35797](https://github.com/ClickHouse/ClickHouse/pull/35797) ([Azat Khuzhin](https://github.com/azat)). -* Fix substring function range error length when `offset` and `length` is negative constant and `s` is not constant. [#33861](https://github.com/ClickHouse/ClickHouse/pull/33861) ([RogerYK](https://github.com/RogerYK)). - -### ClickHouse release 22.4, 2022-04-19 - -#### Backward Incompatible Change - -* Do not allow SETTINGS after FORMAT for INSERT queries (there is compatibility setting `allow_settings_after_format_in_insert` to accept such queries, but it is turned OFF by default). [#35883](https://github.com/ClickHouse/ClickHouse/pull/35883) ([Azat Khuzhin](https://github.com/azat)). -* Function `yandexConsistentHash` (consistent hashing algorithm by Konstantin "kostik" Oblakov) is renamed to `kostikConsistentHash`. The old name is left as an alias for compatibility. Although this change is backward compatible, we may remove the alias in subsequent releases, that's why it's recommended to update the usages of this function in your apps. [#35553](https://github.com/ClickHouse/ClickHouse/pull/35553) ([Alexey Milovidov](https://github.com/alexey-milovidov)). - -#### New Feature - -* Added INTERPOLATE extension to the ORDER BY ... WITH FILL. Closes [#34903](https://github.com/ClickHouse/ClickHouse/issues/34903). [#35349](https://github.com/ClickHouse/ClickHouse/pull/35349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Profiling on Processors level (under `log_processors_profiles` setting, ClickHouse will write time that processor spent during execution/waiting for data to `system.processors_profile_log` table). [#34355](https://github.com/ClickHouse/ClickHouse/pull/34355) ([Azat Khuzhin](https://github.com/azat)). -* Added functions makeDate(year, month, day), makeDate32(year, month, day). [#35628](https://github.com/ClickHouse/ClickHouse/pull/35628) ([Alexander Gololobov](https://github.com/davenger)). Implementation of makeDateTime() and makeDateTIme64(). [#35934](https://github.com/ClickHouse/ClickHouse/pull/35934) ([Alexander Gololobov](https://github.com/davenger)). -* Support new type of quota `WRITTEN BYTES` to limit amount of written bytes during insert queries. [#35736](https://github.com/ClickHouse/ClickHouse/pull/35736) ([Anton Popov](https://github.com/CurtizJ)). -* Added function `flattenTuple`. It receives nested named `Tuple` as an argument and returns a flatten `Tuple` which elements are the paths from the original `Tuple`. E.g.: `Tuple(a Int, Tuple(b Int, c Int)) -> Tuple(a Int, b Int, c Int)`. `flattenTuple` can be used to select all paths from type `Object` as separate columns. [#35690](https://github.com/ClickHouse/ClickHouse/pull/35690) ([Anton Popov](https://github.com/CurtizJ)). -* Added functions `arrayFirstOrNull`, `arrayLastOrNull`. Closes [#35238](https://github.com/ClickHouse/ClickHouse/issues/35238). [#35414](https://github.com/ClickHouse/ClickHouse/pull/35414) ([Maksim Kita](https://github.com/kitaisreal)). -* Added functions `minSampleSizeContinous` and `minSampleSizeConversion`. Author [achimbab](https://github.com/achimbab). [#35360](https://github.com/ClickHouse/ClickHouse/pull/35360) ([Maksim Kita](https://github.com/kitaisreal)). -* New functions minSampleSizeContinous and minSampleSizeConversion. [#34354](https://github.com/ClickHouse/ClickHouse/pull/34354) ([achimbab](https://github.com/achimbab)). -* Introduce format `ProtobufList` (all records as repeated messages in out Protobuf). Closes [#16436](https://github.com/ClickHouse/ClickHouse/issues/16436). [#35152](https://github.com/ClickHouse/ClickHouse/pull/35152) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Add `h3PointDistM`, `h3PointDistKm`, `h3PointDistRads`, `h3GetRes0Indexes`, `h3GetPentagonIndexes` functions. [#34568](https://github.com/ClickHouse/ClickHouse/pull/34568) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `toLastDayOfMonth` function which rounds up a date or date with time to the last day of the month. [#33501](https://github.com/ClickHouse/ClickHouse/issues/33501). [#34394](https://github.com/ClickHouse/ClickHouse/pull/34394) ([Habibullah Oladepo](https://github.com/holadepo)). -* Added load balancing setting for \[Zoo\]Keeper client. Closes [#29617](https://github.com/ClickHouse/ClickHouse/issues/29617). [#30325](https://github.com/ClickHouse/ClickHouse/pull/30325) ([小路](https://github.com/nicelulu)). -* Add a new kind of row policies named `simple`. Before this PR we had two kinds or row policies: `permissive` and `restrictive`. A `simple` row policy adds a new filter on a table without any side-effects like it was for permissive and restrictive policies. [#35345](https://github.com/ClickHouse/ClickHouse/pull/35345) ([Vitaly Baranov](https://github.com/vitlibar)). -* Added an ability to specify cluster secret in replicated database. [#35333](https://github.com/ClickHouse/ClickHouse/pull/35333) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Added sanity checks on server startup (available memory and disk space, max thread count, etc). [#34566](https://github.com/ClickHouse/ClickHouse/pull/34566) ([Sergei Trifonov](https://github.com/serxa)). -* INTERVAL improvement - can be used with `[MILLI|MICRO|NANO]SECOND`. Added `toStartOf[Milli|Micro|Nano]second()` functions. Added `[add|subtract][Milli|Micro|Nano]seconds()`. [#34353](https://github.com/ClickHouse/ClickHouse/pull/34353) ([Andrey Zvonov](https://github.com/zvonand)). - -#### Experimental Feature - -* Added support for transactions for simple `MergeTree` tables. This feature is highly experimental and not recommended for production. Part of [#22086](https://github.com/ClickHouse/ClickHouse/issues/22086). [#24258](https://github.com/ClickHouse/ClickHouse/pull/24258) ([tavplubix](https://github.com/tavplubix)). -* Support schema inference for type `Object` in format `JSONEachRow`. Allow to convert columns of type `Map` to columns of type `Object`. [#35629](https://github.com/ClickHouse/ClickHouse/pull/35629) ([Anton Popov](https://github.com/CurtizJ)). -* Allow to write remote FS cache on all write operations. Add `system.remote_filesystem_cache` table. Add `drop remote filesystem cache` query. Add introspection for s3 metadata with `system.remote_data_paths` table. Closes [#34021](https://github.com/ClickHouse/ClickHouse/issues/34021). Add cache option for merges by adding mode `read_from_filesystem_cache_if_exists_otherwise_bypass_cache` (turned on by default for merges and can also be turned on by query setting with the same name). Rename cache related settings (`remote_fs_enable_cache -> enable_filesystem_cache`, etc). [#35475](https://github.com/ClickHouse/ClickHouse/pull/35475) ([Kseniia Sumarokova](https://github.com/kssenii)). -* An option to store parts metadata in RocksDB. Speed up parts loading process of MergeTree to accelerate starting up of clickhouse-server. With this improvement, clickhouse-server was able to decrease starting up time from 75 minutes to 20 seconds, with 700k mergetree parts. [#32928](https://github.com/ClickHouse/ClickHouse/pull/32928) ([李扬](https://github.com/taiyang-li)). - -#### Performance Improvement - -* A new query plan optimization. Evaluate functions after `ORDER BY` when possible. As an example, for a query `SELECT sipHash64(number) FROM numbers(1e8) ORDER BY number LIMIT 5`, function `sipHash64` would be evaluated after `ORDER BY` and `LIMIT`, which gives ~20x speed up. [#35623](https://github.com/ClickHouse/ClickHouse/pull/35623) ([Nikita Taranov](https://github.com/nickitat)). -* Sizes of hash tables used during aggregation now collected and used in later queries to avoid hash tables resizes. [#33439](https://github.com/ClickHouse/ClickHouse/pull/33439) ([Nikita Taranov](https://github.com/nickitat)). -* Improvement for hasAll function using SIMD instructions (SSE and AVX2). [#27653](https://github.com/ClickHouse/ClickHouse/pull/27653) ([youennL-cs](https://github.com/youennL-cs)). [#35723](https://github.com/ClickHouse/ClickHouse/pull/35723) ([Maksim Kita](https://github.com/kitaisreal)). -* Multiple changes to improve ASOF JOIN performance (1.2 - 1.6x as fast). It also adds support to use big integers. [#34733](https://github.com/ClickHouse/ClickHouse/pull/34733) ([Raúl Marín](https://github.com/Algunenano)). -* Improve performance of ASOF JOIN if key is native integer. [#35525](https://github.com/ClickHouse/ClickHouse/pull/35525) ([Maksim Kita](https://github.com/kitaisreal)). -* Parallelization of multipart upload into S3 storage. [#35343](https://github.com/ClickHouse/ClickHouse/pull/35343) ([Sergei Trifonov](https://github.com/serxa)). -* URL storage engine now downloads multiple chunks in parallel if the endpoint supports HTTP Range. Two additional settings were added, `max_download_threads` and `max_download_buffer_size`, which control maximum number of threads a single query can use to download the file and the maximum number of bytes each thread can process. [#35150](https://github.com/ClickHouse/ClickHouse/pull/35150) ([Antonio Andelic](https://github.com/antonio2368)). -* Use multiple threads to download objects from S3. Downloading is controllable using `max_download_threads` and `max_download_buffer_size` settings. [#35571](https://github.com/ClickHouse/ClickHouse/pull/35571) ([Antonio Andelic](https://github.com/antonio2368)). -* Narrow mutex scope when interacting with HDFS. Related to [#35292](https://github.com/ClickHouse/ClickHouse/issues/35292). [#35646](https://github.com/ClickHouse/ClickHouse/pull/35646) ([shuchaome](https://github.com/shuchaome)). -* Require mutations for per-table TTL only when it had been changed. [#35953](https://github.com/ClickHouse/ClickHouse/pull/35953) ([Azat Khuzhin](https://github.com/azat)). - -#### Improvement - -* Multiple improvements for schema inference. Use some tweaks and heuristics to determine numbers, strings, arrays, tuples and maps in CSV, TSV and TSVRaw data formats. Add setting `input_format_csv_use_best_effort_in_schema_inference` for CSV format that enables/disables using these heuristics, if it's disabled, we treat everything as string. Add similar setting `input_format_tsv_use_best_effort_in_schema_inference` for TSV/TSVRaw format. These settings are enabled by default. - Add Maps support for schema inference in Values format. - Fix possible segfault in schema inference in Values format. - Allow to skip columns with unsupported types in Arrow/ORC/Parquet formats. Add corresponding settings for it: `input_format_{parquet|orc|arrow}_skip_columns_with_unsupported_types_in_schema_inference`. These settings are disabled by default. - Allow to convert a column with type Null to a Nullable column with all NULL values in Arrow/Parquet formats. - Allow to specify column names in schema inference via setting `column_names_for_schema_inference` for formats that don't contain column names (like CSV, TSV, JSONCompactEachRow, etc) - Fix schema inference in ORC/Arrow/Parquet formats in terms of working with Nullable columns. Previously all inferred types were not Nullable and it blocked reading Nullable columns from data, now it's fixed and all inferred types are always Nullable (because we cannot understand that column is Nullable or not by reading the schema). - Fix schema inference in Template format with CSV escaping rules. [#35582](https://github.com/ClickHouse/ClickHouse/pull/35582) ([Kruglov Pavel](https://github.com/Avogar)). -* Add parallel parsing and schema inference for format `JSONAsObject`. [#35592](https://github.com/ClickHouse/ClickHouse/pull/35592) ([Anton Popov](https://github.com/CurtizJ)). -* Added a support for automatic schema inference to `s3Cluster` table function. Synced the signatures of `s3 ` and `s3Cluster`. [#35544](https://github.com/ClickHouse/ClickHouse/pull/35544) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Added support for schema inference for `hdfsCluster`. [#35602](https://github.com/ClickHouse/ClickHouse/pull/35602) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Add new setting `input_format_json_read_bools_as_numbers` that allows to infer and parse bools as numbers in JSON input formats. It's enabled by default. Suggested by @alexey-milovidov. [#35735](https://github.com/ClickHouse/ClickHouse/pull/35735) ([Kruglov Pavel](https://github.com/Avogar)). -* Improve columns ordering in schema inference for formats TSKV and JSONEachRow, closes [#35640](https://github.com/ClickHouse/ClickHouse/issues/35640). Don't stop schema inference when reading empty row in schema inference for formats TSKV and JSONEachRow. [#35724](https://github.com/ClickHouse/ClickHouse/pull/35724) ([Kruglov Pavel](https://github.com/Avogar)). -* Add settings `input_format_orc_case_insensitive_column_matching`, `input_format_arrow_case_insensitive_column_matching`, and `input_format_parquet_case_insensitive_column_matching` which allows ClickHouse to use case insensitive matching of columns while reading data from ORC, Arrow or Parquet files. [#35459](https://github.com/ClickHouse/ClickHouse/pull/35459) ([Antonio Andelic](https://github.com/antonio2368)). -* Added `is_secure` column to `system.query_log` which denotes if the client is using a secure connection over TCP or HTTP. [#35705](https://github.com/ClickHouse/ClickHouse/pull/35705) ([Antonio Andelic](https://github.com/antonio2368)). -* Now `kafka_num_consumers` can be bigger than amount of physical cores in case of low resource machine (less than 16 cores). [#35926](https://github.com/ClickHouse/ClickHouse/pull/35926) ([alesapin](https://github.com/alesapin)). -* Add some basic metrics to monitor engine=Kafka tables. [#35916](https://github.com/ClickHouse/ClickHouse/pull/35916) ([filimonov](https://github.com/filimonov)). -* Now it's not allowed to `ALTER TABLE ... RESET SETTING` for non-existing settings for MergeTree engines family. Fixes [#35816](https://github.com/ClickHouse/ClickHouse/issues/35816). [#35884](https://github.com/ClickHouse/ClickHouse/pull/35884) ([alesapin](https://github.com/alesapin)). -* Now some `ALTER MODIFY COLUMN` queries for `Arrays` and `Nullable` types can be done at metadata level without mutations. For example, alter from `Array(Enum8('Option1'=1))` to `Array(Enum8('Option1'=1, 'Option2'=2))`. [#35882](https://github.com/ClickHouse/ClickHouse/pull/35882) ([alesapin](https://github.com/alesapin)). -* Added an animation to the hourglass icon to indicate to the user that a query is running. [#35860](https://github.com/ClickHouse/ClickHouse/pull/35860) ([peledni](https://github.com/peledni)). -* support ALTER TABLE t DETACH PARTITION (ALL). [#35794](https://github.com/ClickHouse/ClickHouse/pull/35794) ([awakeljw](https://github.com/awakeljw)). -* Improve projection analysis to optimize trivial queries such as `count()`. [#35788](https://github.com/ClickHouse/ClickHouse/pull/35788) ([Amos Bird](https://github.com/amosbird)). -* Support schema inference for insert select with using `input` table function. Get schema from insertion table instead of inferring it from the data in case of insert select from table functions that support schema inference. Closes [#35639](https://github.com/ClickHouse/ClickHouse/issues/35639). [#35760](https://github.com/ClickHouse/ClickHouse/pull/35760) ([Kruglov Pavel](https://github.com/Avogar)). -* Respect `remote_url_allow_hosts` for Hive tables. [#35743](https://github.com/ClickHouse/ClickHouse/pull/35743) ([李扬](https://github.com/taiyang-li)). -* Implement `send_logs_level` for clickhouse-local. Closes [#35653](https://github.com/ClickHouse/ClickHouse/issues/35653). [#35716](https://github.com/ClickHouse/ClickHouse/pull/35716) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Closes [#35641](https://github.com/ClickHouse/ClickHouse/issues/35641) Allow `EPHEMERAL` columns without explicit default expression. [#35706](https://github.com/ClickHouse/ClickHouse/pull/35706) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Add profile event counter `AsyncInsertBytes` about size of async INSERTs. [#35644](https://github.com/ClickHouse/ClickHouse/pull/35644) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Improve the pipeline description for JOIN. [#35612](https://github.com/ClickHouse/ClickHouse/pull/35612) ([何李夫](https://github.com/helifu)). -* Deduce absolute hdfs config path. [#35572](https://github.com/ClickHouse/ClickHouse/pull/35572) ([李扬](https://github.com/taiyang-li)). -* Improve pasting performance and compatibility of clickhouse-client. This helps [#35501](https://github.com/ClickHouse/ClickHouse/issues/35501). [#35541](https://github.com/ClickHouse/ClickHouse/pull/35541) ([Amos Bird](https://github.com/amosbird)). -* It was possible to get stack overflow in distributed queries if one of the settings `async_socket_for_remote` and `use_hedged_requests` is enabled while parsing very deeply nested data type (at least in debug build). Closes [#35509](https://github.com/ClickHouse/ClickHouse/issues/35509). [#35524](https://github.com/ClickHouse/ClickHouse/pull/35524) ([Kruglov Pavel](https://github.com/Avogar)). -* Add sizes of subcolumns to `system.parts_columns` table. [#35488](https://github.com/ClickHouse/ClickHouse/pull/35488) ([Anton Popov](https://github.com/CurtizJ)). -* Add explicit table info to the scan node of query plan and pipeline. [#35460](https://github.com/ClickHouse/ClickHouse/pull/35460) ([何李夫](https://github.com/helifu)). -* Allow server to bind to low-numbered ports (e.g. 443). ClickHouse installation script will set `cap_net_bind_service` to the binary file. [#35451](https://github.com/ClickHouse/ClickHouse/pull/35451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix INSERT INTO table FROM INFILE: it did not display the progress bar. [#35429](https://github.com/ClickHouse/ClickHouse/pull/35429) ([xiedeyantu](https://github.com/xiedeyantu)). -* Add arguments `--user`, `--password`, `--host`, `--port` for `clickhouse-diagnostics` tool. [#35422](https://github.com/ClickHouse/ClickHouse/pull/35422) ([李扬](https://github.com/taiyang-li)). -* Support uuid for Postgres engines. Closes [#35384](https://github.com/ClickHouse/ClickHouse/issues/35384). [#35403](https://github.com/ClickHouse/ClickHouse/pull/35403) ([Kseniia Sumarokova](https://github.com/kssenii)). -* For table function `s3cluster` or `HDFSCluster` or `hive`, we can't get right `AccessType` by `StorageFactory::instance().getSourceAccessType(getStorageTypeName())`. This pr fix it. [#35365](https://github.com/ClickHouse/ClickHouse/pull/35365) ([李扬](https://github.com/taiyang-li)). -* Remove `--testmode` option for clickhouse-client, enable it unconditionally. [#35354](https://github.com/ClickHouse/ClickHouse/pull/35354) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Don't allow `wchc` operation (four letter command) for clickhouse-keeper. [#35320](https://github.com/ClickHouse/ClickHouse/pull/35320) ([zhangyuli1](https://github.com/zhangyuli1)). -* Add function `getTypeSerializationStreams`. For a specified type (which is detected from column), it returns an array with all the serialization substream paths. This function is useful mainly for developers. [#35290](https://github.com/ClickHouse/ClickHouse/pull/35290) ([李扬](https://github.com/taiyang-li)). -* If `port` is not specified in cluster configuration, default server port will be used. This closes [#34769](https://github.com/ClickHouse/ClickHouse/issues/34769). [#34772](https://github.com/ClickHouse/ClickHouse/pull/34772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Use `minmax` index for orc/parquet file in Hive Engine. Related PR: https://github.com/ClickHouse/arrow/pull/10. [#34631](https://github.com/ClickHouse/ClickHouse/pull/34631) ([李扬](https://github.com/taiyang-li)). -* System log tables now allow to specify COMMENT in ENGINE declaration. Closes [#33768](https://github.com/ClickHouse/ClickHouse/issues/33768). [#34536](https://github.com/ClickHouse/ClickHouse/pull/34536) ([Maksim Kita](https://github.com/kitaisreal)). -* Proper support of setting `max_rows_to_read` in case of reading in order of sorting key and specified limit. Previously the exception `Limit for rows or bytes to read exceeded` could be thrown even if query actually requires to read less amount of rows. [#33230](https://github.com/ClickHouse/ClickHouse/pull/33230) ([Anton Popov](https://github.com/CurtizJ)). -* Respect only quota & period from cgroups, ignore shares (which are not really limit the number of the cores which can be used). [#35815](https://github.com/ClickHouse/ClickHouse/pull/35815) ([filimonov](https://github.com/filimonov)). - -#### Build/Testing/Packaging Improvement - -* Add next batch of randomization settings in functional tests. [#35047](https://github.com/ClickHouse/ClickHouse/pull/35047) ([Kruglov Pavel](https://github.com/Avogar)). -* Add backward compatibility check in stress test. Closes [#25088](https://github.com/ClickHouse/ClickHouse/issues/25088). [#27928](https://github.com/ClickHouse/ClickHouse/pull/27928) ([Kruglov Pavel](https://github.com/Avogar)). -* Migrate package building to `nfpm` - Deprecate `release` script in favor of `packages/build` - Build everything in clickhouse/binary-builder image (cleanup: clickhouse/deb-builder) - Add symbol stripping to cmake (todo: use $prefix/lib/$bin_dir/clickhouse/$binary.debug) - Fix issue with DWARF symbols - Add Alpine APK packages - Rename `alien` to `additional_pkgs`. [#33664](https://github.com/ClickHouse/ClickHouse/pull/33664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add a night scan and upload for Coverity. [#34895](https://github.com/ClickHouse/ClickHouse/pull/34895) ([Boris Kuschel](https://github.com/bkuschel)). -* A dedicated small package for `clickhouse-keeper`. [#35308](https://github.com/ClickHouse/ClickHouse/pull/35308) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Running with podman was failing: it complains about specifying the same volume twice. [#35978](https://github.com/ClickHouse/ClickHouse/pull/35978) ([Roman Nikonov](https://github.com/nic11)). -* Minor improvement in contrib/krb5 build configuration. [#35832](https://github.com/ClickHouse/ClickHouse/pull/35832) ([Anton Kozlov](https://github.com/tonickkozlov)). -* Add a label to recognize a building task for every image. [#35583](https://github.com/ClickHouse/ClickHouse/pull/35583) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Apply `black` formatter to python code and add a per-commit check. [#35466](https://github.com/ClickHouse/ClickHouse/pull/35466) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Redo alpine image to use clean Dockerfile. Create a script in tests/ci to build both ubuntu and alpine images. Add clickhouse-keeper image (cc @nikitamikhaylov). Add build check to PullRequestCI. Add a job to a ReleaseCI. Add a job to MasterCI to build and push `clickhouse/clickhouse-server:head` and `clickhouse/clickhouse-keeper:head` images for each merged PR. [#35211](https://github.com/ClickHouse/ClickHouse/pull/35211) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Fix stress-test report in CI, now we upload the runlog with information about started stress tests only once. [#35093](https://github.com/ClickHouse/ClickHouse/pull/35093) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Switch to libcxx / libcxxabi from LLVM 14. [#34906](https://github.com/ClickHouse/ClickHouse/pull/34906) ([Raúl Marín](https://github.com/Algunenano)). -* Update unixodbc to mitigate CVE-2018-7485. Note: this CVE is not relevant for ClickHouse as it implements its own isolation layer for ODBC. [#35943](https://github.com/ClickHouse/ClickHouse/pull/35943) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - -#### Bug Fix - -* Added settings `input_format_ipv4_default_on_conversion_error`, `input_format_ipv6_default_on_conversion_error` to allow insert of invalid ip address values as default into tables. Closes [#35726](https://github.com/ClickHouse/ClickHouse/issues/35726). [#35733](https://github.com/ClickHouse/ClickHouse/pull/35733) ([Maksim Kita](https://github.com/kitaisreal)). -* Avoid erasing columns from a block if it doesn't exist while reading data from Hive. [#35393](https://github.com/ClickHouse/ClickHouse/pull/35393) ([lgbo](https://github.com/lgbo-ustc)). -* Add type checking when creating materialized view. Close: [#23684](https://github.com/ClickHouse/ClickHouse/issues/23684). [#24896](https://github.com/ClickHouse/ClickHouse/pull/24896) ([hexiaoting](https://github.com/hexiaoting)). -* Fix formatting of INSERT INFILE queries (missing quotes). [#35886](https://github.com/ClickHouse/ClickHouse/pull/35886) ([Azat Khuzhin](https://github.com/azat)). -* Disable `session_log` because memory safety issue has been found by fuzzing. See [#35714](https://github.com/ClickHouse/ClickHouse/issues/35714). [#35873](https://github.com/ClickHouse/ClickHouse/pull/35873) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Avoid processing per-column TTL multiple times. [#35820](https://github.com/ClickHouse/ClickHouse/pull/35820) ([Azat Khuzhin](https://github.com/azat)). -* Fix inserts to columns of type `Object` in case when there is data related to several partitions in insert query. [#35806](https://github.com/ClickHouse/ClickHouse/pull/35806) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bug in indexes of not presented columns in -WithNames formats that led to error `INCORRECT_NUMBER_OF_COLUMNS ` when the number of columns is more than 256. Closes [#35793](https://github.com/ClickHouse/ClickHouse/issues/35793). [#35803](https://github.com/ClickHouse/ClickHouse/pull/35803) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixes [#35751](https://github.com/ClickHouse/ClickHouse/issues/35751). [#35799](https://github.com/ClickHouse/ClickHouse/pull/35799) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix for reading from HDFS in Snappy format. [#35771](https://github.com/ClickHouse/ClickHouse/pull/35771) ([shuchaome](https://github.com/shuchaome)). -* Fix bug in conversion from custom types to string that could lead to segfault or unexpected error messages. Closes [#35752](https://github.com/ClickHouse/ClickHouse/issues/35752). [#35755](https://github.com/ClickHouse/ClickHouse/pull/35755) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix any/all (subquery) implementation. Closes [#35489](https://github.com/ClickHouse/ClickHouse/issues/35489). [#35727](https://github.com/ClickHouse/ClickHouse/pull/35727) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix dropping non-empty database in clickhouse-local. Closes [#35692](https://github.com/ClickHouse/ClickHouse/issues/35692). [#35711](https://github.com/ClickHouse/ClickHouse/pull/35711) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix bug in creating materialized view with subquery after server restart. Materialized view was not getting updated after inserts into underlying table after server restart. Closes [#35511](https://github.com/ClickHouse/ClickHouse/issues/35511). [#35691](https://github.com/ClickHouse/ClickHouse/pull/35691) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible `Can't adjust last granule` exception while reading subcolumns of experimental type `Object`. [#35687](https://github.com/ClickHouse/ClickHouse/pull/35687) ([Anton Popov](https://github.com/CurtizJ)). -* Enable build with JIT compilation by default. [#35683](https://github.com/ClickHouse/ClickHouse/pull/35683) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix possible loss of subcolumns in experimental type `Object`. [#35682](https://github.com/ClickHouse/ClickHouse/pull/35682) ([Anton Popov](https://github.com/CurtizJ)). -* Fix check ASOF JOIN key nullability, close [#35565](https://github.com/ClickHouse/ClickHouse/issues/35565). [#35674](https://github.com/ClickHouse/ClickHouse/pull/35674) ([Vladimir C](https://github.com/vdimir)). -* Fix part checking logic for parts with projections. Error happened when projection and main part had different types. This is similar to https://github.com/ClickHouse/ClickHouse/pull/33774 . The bug is addressed by @caoyang10. [#35667](https://github.com/ClickHouse/ClickHouse/pull/35667) ([Amos Bird](https://github.com/amosbird)). -* Fix server crash when large number of arguments are passed into `format` function. Please refer to the test file and see how to reproduce the crash. [#35651](https://github.com/ClickHouse/ClickHouse/pull/35651) ([Amos Bird](https://github.com/amosbird)). -* Fix usage of quotas with asynchronous inserts. [#35645](https://github.com/ClickHouse/ClickHouse/pull/35645) ([Anton Popov](https://github.com/CurtizJ)). -* Fix positional arguments with aliases. Closes [#35600](https://github.com/ClickHouse/ClickHouse/issues/35600). [#35620](https://github.com/ClickHouse/ClickHouse/pull/35620) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Check `remote_url_allow_hosts` before schema inference in URL engine Closes [#35064](https://github.com/ClickHouse/ClickHouse/issues/35064). [#35619](https://github.com/ClickHouse/ClickHouse/pull/35619) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `HashJoin` when columns with `LowCardinality` type are used. This closes [#35548](https://github.com/ClickHouse/ClickHouse/issues/35548). [#35616](https://github.com/ClickHouse/ClickHouse/pull/35616) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix possible segfault in MaterializedPostgreSQL which happened if exception occurred when data, collected in memory, was synced into underlying tables. Closes [#35611](https://github.com/ClickHouse/ClickHouse/issues/35611). [#35614](https://github.com/ClickHouse/ClickHouse/pull/35614) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Setting `database_atomic_wait_for_drop_and_detach_synchronously` worked incorrectly for `ATTACH TABLE` query when previously detached table is still in use, It's fixed. [#35594](https://github.com/ClickHouse/ClickHouse/pull/35594) ([tavplubix](https://github.com/tavplubix)). -* Fix HTTP headers with named collections, add compression_method. Closes [#35273](https://github.com/ClickHouse/ClickHouse/issues/35273). Closes [#35269](https://github.com/ClickHouse/ClickHouse/issues/35269). [#35593](https://github.com/ClickHouse/ClickHouse/pull/35593) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix s3 engine getting virtual columns. Closes [#35411](https://github.com/ClickHouse/ClickHouse/issues/35411). [#35586](https://github.com/ClickHouse/ClickHouse/pull/35586) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fixed return type deduction for `caseWithExpression`. The type of the ELSE branch is now correctly taken into account. [#35576](https://github.com/ClickHouse/ClickHouse/pull/35576) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix parsing of IPv6 addresses longer than 39 characters. Closes [#34022](https://github.com/ClickHouse/ClickHouse/issues/34022). [#35539](https://github.com/ClickHouse/ClickHouse/pull/35539) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix cast into IPv4, IPv6 address in IN section. Fixes [#35528](https://github.com/ClickHouse/ClickHouse/issues/35528). [#35534](https://github.com/ClickHouse/ClickHouse/pull/35534) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crash during short circuit function evaluation when one of arguments is nullable constant. Closes [#35497](https://github.com/ClickHouse/ClickHouse/issues/35497). Closes [#35496](https://github.com/ClickHouse/ClickHouse/issues/35496). [#35502](https://github.com/ClickHouse/ClickHouse/pull/35502) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix crash for function `throwIf` with constant arguments. [#35500](https://github.com/ClickHouse/ClickHouse/pull/35500) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix bug in Keeper which can lead to unstable client connections. Introduced in [#35031](https://github.com/ClickHouse/ClickHouse/issues/35031). [#35498](https://github.com/ClickHouse/ClickHouse/pull/35498) ([alesapin](https://github.com/alesapin)). -* Fix bug in function `if` when resulting column type differs with resulting data type that led to logical errors like `Logical error: 'Bad cast from type DB::ColumnVector to DB::ColumnVector'.`. Closes [#35367](https://github.com/ClickHouse/ClickHouse/issues/35367). [#35476](https://github.com/ClickHouse/ClickHouse/pull/35476) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix excessive logging when using S3 as backend for MergeTree or as separate table engine/function. Fixes [#30559](https://github.com/ClickHouse/ClickHouse/issues/30559). [#35434](https://github.com/ClickHouse/ClickHouse/pull/35434) ([alesapin](https://github.com/alesapin)). -* Now merges executed with zero copy replication (experimental) will not spam logs with message `Found parts with the same min block and with the same max block as the missing part _ on replica _. Hoping that it will eventually appear as a result of a merge.`. [#35430](https://github.com/ClickHouse/ClickHouse/pull/35430) ([alesapin](https://github.com/alesapin)). -* Skip possible exception if empty chunks appear in GroupingAggregatedTransform. [#35417](https://github.com/ClickHouse/ClickHouse/pull/35417) ([Nikita Taranov](https://github.com/nickitat)). -* Fix working with columns that are not needed in query in Arrow/Parquet/ORC formats, it prevents possible errors like `Unsupported type of an input column ` when file contains column with unsupported type and we don't use it in query. [#35406](https://github.com/ClickHouse/ClickHouse/pull/35406) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix for local cache for remote filesystem (experimental feature) for high concurrency on corner cases. [#35381](https://github.com/ClickHouse/ClickHouse/pull/35381) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix possible deadlock in cache. [#35378](https://github.com/ClickHouse/ClickHouse/pull/35378) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix partition pruning in case of comparison with constant in `WHERE`. If column and constant had different types, overflow was possible. Query could return an incorrect empty result. This fixes [#35304](https://github.com/ClickHouse/ClickHouse/issues/35304). [#35334](https://github.com/ClickHouse/ClickHouse/pull/35334) ([Amos Bird](https://github.com/amosbird)). -* Fix schema inference for TSKV format while using small max_read_buffer_size. [#35332](https://github.com/ClickHouse/ClickHouse/pull/35332) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix mutations in tables with enabled sparse columns. [#35284](https://github.com/ClickHouse/ClickHouse/pull/35284) ([Anton Popov](https://github.com/CurtizJ)). -* Do not delay final part writing by default (fixes possible `Memory limit exceeded` during `INSERT` by adding `max_insert_delayed_streams_for_parallel_write` with default to 1000 for writes to s3 and disabled as before otherwise). [#34780](https://github.com/ClickHouse/ClickHouse/pull/34780) ([Azat Khuzhin](https://github.com/azat)). - -### ClickHouse release v22.3-lts, 2022-03-17 - -#### Backward Incompatible Change - -* Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)). -* Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)). -* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`, now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)). - -#### New Feature - -* Support for caching data locally for remote filesystems. It can be enabled for `s3` disks. Closes [#28961](https://github.com/ClickHouse/ClickHouse/issues/28961). [#33717](https://github.com/ClickHouse/ClickHouse/pull/33717) ([Kseniia Sumarokova](https://github.com/kssenii)). In the meantime, we enabled the test suite on s3 filesystem and no more known issues exist, so it is started to be production ready. -* Add new table function `hive`. It can be used as follows `hive('', '', '', '', '')` for example `SELECT * FROM hive('thrift://hivetest:9083', 'test', 'demo', 'id Nullable(String), score Nullable(Int32), day Nullable(String)', 'day')`. [#34946](https://github.com/ClickHouse/ClickHouse/pull/34946) ([lgbo](https://github.com/lgbo-ustc)). -* Support authentication of users connected via SSL by their X.509 certificate. [#31484](https://github.com/ClickHouse/ClickHouse/pull/31484) ([eungenue](https://github.com/eungenue)). -* Support schema inference for inserting into table functions `file`/`hdfs`/`s3`/`url`. [#34732](https://github.com/ClickHouse/ClickHouse/pull/34732) ([Kruglov Pavel](https://github.com/Avogar)). -* Now you can read `system.zookeeper` table without restrictions on path or using `like` expression. This reads can generate quite heavy load for zookeeper so to enable this ability you have to enable setting `allow_unrestricted_reads_from_keeper`. [#34609](https://github.com/ClickHouse/ClickHouse/pull/34609) ([Sergei Trifonov](https://github.com/serxa)). -* Display CPU and memory metrics in clickhouse-local. Close [#34545](https://github.com/ClickHouse/ClickHouse/issues/34545). [#34605](https://github.com/ClickHouse/ClickHouse/pull/34605) ([李扬](https://github.com/taiyang-li)). -* Implement `startsWith` and `endsWith` function for arrays, closes [#33982](https://github.com/ClickHouse/ClickHouse/issues/33982). [#34368](https://github.com/ClickHouse/ClickHouse/pull/34368) ([usurai](https://github.com/usurai)). -* Add three functions for Map data type: 1. `mapReplace(map1, map2)` - replaces values for keys in map1 with the values of the corresponding keys in map2; adds keys from map2 that don't exist in map1. 2. `mapFilter` 3. `mapMap`. mapFilter and mapMap are higher order functions, accepting two arguments, the first argument is a lambda function with k, v pair as arguments, the second argument is a column of type Map. [#33698](https://github.com/ClickHouse/ClickHouse/pull/33698) ([hexiaoting](https://github.com/hexiaoting)). -* Allow getting default user and password for clickhouse-client from the `CLICKHOUSE_USER` and `CLICKHOUSE_PASSWORD` environment variables. Close [#34538](https://github.com/ClickHouse/ClickHouse/issues/34538). [#34947](https://github.com/ClickHouse/ClickHouse/pull/34947) ([DR](https://github.com/freedomDR)). - -#### Experimental Feature - -* New data type `Object()`, which supports storing of semi-structured data (for now JSON only). Data is written to such types as string. Then all paths are extracted according to format of semi-structured data and written as separate columns in most optimal types, that can store all their values. Those columns can be queried by names that match paths in source data. E.g `data.key1.key2` or with cast operator `data.key1.key2::Int64`. -* Add `database_replicated_allow_only_replicated_engine` setting. When enabled, it only allowed to only create `Replicated` tables or tables with stateless engines in `Replicated` databases. [#35214](https://github.com/ClickHouse/ClickHouse/pull/35214) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). Note that `Replicated` database is still an experimental feature. - -#### Performance Improvement - -* Improve performance of insertion into `MergeTree` tables by optimizing sorting. Up to 2x improvement is observed on realistic benchmarks. [#34750](https://github.com/ClickHouse/ClickHouse/pull/34750) ([Maksim Kita](https://github.com/kitaisreal)). -* Columns pruning when reading Parquet, ORC and Arrow files from URL and S3. Closes [#34163](https://github.com/ClickHouse/ClickHouse/issues/34163). [#34849](https://github.com/ClickHouse/ClickHouse/pull/34849) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Columns pruning when reading Parquet, ORC and Arrow files from Hive. [#34954](https://github.com/ClickHouse/ClickHouse/pull/34954) ([lgbo](https://github.com/lgbo-ustc)). -* A bunch of performance optimizations from a performance superhero. Improve performance of processing queries with large `IN` section. Improve performance of `direct` dictionary if its source is `ClickHouse`. Improve performance of `detectCharset `, `detectLanguageUnknown ` functions. [#34888](https://github.com/ClickHouse/ClickHouse/pull/34888) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of `any` aggregate function by using more batching. [#34760](https://github.com/ClickHouse/ClickHouse/pull/34760) ([Raúl Marín](https://github.com/Algunenano)). -* Multiple improvements for performance of `clickhouse-keeper`: less locking [#35010](https://github.com/ClickHouse/ClickHouse/pull/35010) ([zhanglistar](https://github.com/zhanglistar)), lower memory usage by streaming reading and writing of snapshot instead of full copy. [#34584](https://github.com/ClickHouse/ClickHouse/pull/34584) ([zhanglistar](https://github.com/zhanglistar)), optimizing compaction of log store in the RAFT implementation. [#34534](https://github.com/ClickHouse/ClickHouse/pull/34534) ([zhanglistar](https://github.com/zhanglistar)), versioning of the internal data structure [#34486](https://github.com/ClickHouse/ClickHouse/pull/34486) ([zhanglistar](https://github.com/zhanglistar)). - -#### Improvement - -* Allow asynchronous inserts to table functions. Fixes [#34864](https://github.com/ClickHouse/ClickHouse/issues/34864). [#34866](https://github.com/ClickHouse/ClickHouse/pull/34866) ([Anton Popov](https://github.com/CurtizJ)). -* Implicit type casting of the key argument for functions `dictGetHierarchy`, `dictIsIn`, `dictGetChildren`, `dictGetDescendants`. Closes [#34970](https://github.com/ClickHouse/ClickHouse/issues/34970). [#35027](https://github.com/ClickHouse/ClickHouse/pull/35027) ([Maksim Kita](https://github.com/kitaisreal)). -* `EXPLAIN AST` query can output AST in form of a graph in Graphviz format: `EXPLAIN AST graph = 1 SELECT * FROM system.parts`. [#35173](https://github.com/ClickHouse/ClickHouse/pull/35173) ([李扬](https://github.com/taiyang-li)). -* When large files were written with `s3` table function or table engine, the content type on the files was mistakenly set to `application/xml` due to a bug in the AWS SDK. This closes [#33964](https://github.com/ClickHouse/ClickHouse/issues/33964). [#34433](https://github.com/ClickHouse/ClickHouse/pull/34433) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Change restrictive row policies a bit to make them an easier alternative to permissive policies in easy cases. If for a particular table only restrictive policies exist (without permissive policies) users will be able to see some rows. Also `SHOW CREATE ROW POLICY` will always show `AS permissive` or `AS restrictive` in row policy's definition. [#34596](https://github.com/ClickHouse/ClickHouse/pull/34596) ([Vitaly Baranov](https://github.com/vitlibar)). -* Improve schema inference with globs in File/S3/HDFS/URL engines. Try to use the next path for schema inference in case of error. [#34465](https://github.com/ClickHouse/ClickHouse/pull/34465) ([Kruglov Pavel](https://github.com/Avogar)). -* Play UI now correctly detects the preferred light/dark theme from the OS. [#35068](https://github.com/ClickHouse/ClickHouse/pull/35068) ([peledni](https://github.com/peledni)). -* Added `date_time_input_format = 'best_effort_us'`. Closes [#34799](https://github.com/ClickHouse/ClickHouse/issues/34799). [#34982](https://github.com/ClickHouse/ClickHouse/pull/34982) ([WenYao](https://github.com/Cai-Yao)). -* A new settings called `allow_plaintext_password` and `allow_no_password` are added in server configuration which turn on/off authentication types that can be potentially insecure in some environments. They are allowed by default. [#34738](https://github.com/ClickHouse/ClickHouse/pull/34738) ([Heena Bansal](https://github.com/HeenaBansal2009)). -* Support for `DateTime64` data type in `Arrow` format, closes [#8280](https://github.com/ClickHouse/ClickHouse/issues/8280) and closes [#28574](https://github.com/ClickHouse/ClickHouse/issues/28574). [#34561](https://github.com/ClickHouse/ClickHouse/pull/34561) ([李扬](https://github.com/taiyang-li)). -* Reload `remote_url_allow_hosts` (filtering of outgoing connections) on config update. [#35294](https://github.com/ClickHouse/ClickHouse/pull/35294) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Support `--testmode` parameter for `clickhouse-local`. This parameter enables interpretation of test hints that we use in functional tests. [#35264](https://github.com/ClickHouse/ClickHouse/pull/35264) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Add `distributed_depth` to query log. It is like a more detailed variant of `is_initial_query` [#35207](https://github.com/ClickHouse/ClickHouse/pull/35207) ([李扬](https://github.com/taiyang-li)). -* Respect `remote_url_allow_hosts` for `MySQL` and `PostgreSQL` table functions. [#35191](https://github.com/ClickHouse/ClickHouse/pull/35191) ([Heena Bansal](https://github.com/HeenaBansal2009)). -* Added `disk_name` field to `system.part_log`. [#35178](https://github.com/ClickHouse/ClickHouse/pull/35178) ([Artyom Yurkov](https://github.com/Varinara)). -* Do not retry non-rertiable errors when querying remote URLs. Closes [#35161](https://github.com/ClickHouse/ClickHouse/issues/35161). [#35172](https://github.com/ClickHouse/ClickHouse/pull/35172) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support distributed INSERT SELECT queries (the setting `parallel_distributed_insert_select`) table function `view()`. [#35132](https://github.com/ClickHouse/ClickHouse/pull/35132) ([Azat Khuzhin](https://github.com/azat)). -* More precise memory tracking during `INSERT` into `Buffer` with `AggregateFunction`. [#35072](https://github.com/ClickHouse/ClickHouse/pull/35072) ([Azat Khuzhin](https://github.com/azat)). -* Avoid division by zero in Query Profiler if Linux kernel has a bug. Closes [#34787](https://github.com/ClickHouse/ClickHouse/issues/34787). [#35032](https://github.com/ClickHouse/ClickHouse/pull/35032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add more sanity checks for keeper configuration: now mixing of localhost and non-local servers is not allowed, also add checks for same value of internal raft port and keeper client port. [#35004](https://github.com/ClickHouse/ClickHouse/pull/35004) ([alesapin](https://github.com/alesapin)). -* Currently, if the user changes the settings of the system tables there will be tons of logs and ClickHouse will rename the tables every minute. This fixes [#34929](https://github.com/ClickHouse/ClickHouse/issues/34929). [#34949](https://github.com/ClickHouse/ClickHouse/pull/34949) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Use connection pool for Hive metastore client. [#34940](https://github.com/ClickHouse/ClickHouse/pull/34940) ([lgbo](https://github.com/lgbo-ustc)). -* Ignore per-column `TTL` in `CREATE TABLE AS` if new table engine does not support it (i.e. if the engine is not of `MergeTree` family). [#34938](https://github.com/ClickHouse/ClickHouse/pull/34938) ([Azat Khuzhin](https://github.com/azat)). -* Allow `LowCardinality` strings for `ngrambf_v1`/`tokenbf_v1` indexes. Closes [#21865](https://github.com/ClickHouse/ClickHouse/issues/21865). [#34911](https://github.com/ClickHouse/ClickHouse/pull/34911) ([Lars Hiller Eidnes](https://github.com/larspars)). -* Allow opening empty sqlite db if the file doesn't exist. Closes [#33367](https://github.com/ClickHouse/ClickHouse/issues/33367). [#34907](https://github.com/ClickHouse/ClickHouse/pull/34907) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Implement memory statistics for FreeBSD - this is required for `max_server_memory_usage` to work correctly. [#34902](https://github.com/ClickHouse/ClickHouse/pull/34902) ([Alexandre Snarskii](https://github.com/snar)). -* In previous versions the progress bar in clickhouse-client can jump forward near 50% for no reason. This closes [#34324](https://github.com/ClickHouse/ClickHouse/issues/34324). [#34801](https://github.com/ClickHouse/ClickHouse/pull/34801) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Now `ALTER TABLE DROP COLUMN columnX` queries for `MergeTree` table engines will work instantly when `columnX` is an `ALIAS` column. Fixes [#34660](https://github.com/ClickHouse/ClickHouse/issues/34660). [#34786](https://github.com/ClickHouse/ClickHouse/pull/34786) ([alesapin](https://github.com/alesapin)). -* Show hints when user mistyped the name of a data skipping index. Closes [#29698](https://github.com/ClickHouse/ClickHouse/issues/29698). [#34764](https://github.com/ClickHouse/ClickHouse/pull/34764) ([flynn](https://github.com/ucasfl)). -* Support `remote()`/`cluster()` table functions for `parallel_distributed_insert_select`. [#34728](https://github.com/ClickHouse/ClickHouse/pull/34728) ([Azat Khuzhin](https://github.com/azat)). -* Do not reset logging that configured via `--log-file`/`--errorlog-file` command line options in case of empty configuration in the config file. [#34718](https://github.com/ClickHouse/ClickHouse/pull/34718) ([Amos Bird](https://github.com/amosbird)). -* Extract schema only once on table creation and prevent reading from local files/external sources to extract schema on each server startup. [#34684](https://github.com/ClickHouse/ClickHouse/pull/34684) ([Kruglov Pavel](https://github.com/Avogar)). -* Allow specifying argument names for executable UDFs. This is necessary for formats where argument name is part of serialization, like `Native`, `JSONEachRow`. Closes [#34604](https://github.com/ClickHouse/ClickHouse/issues/34604). [#34653](https://github.com/ClickHouse/ClickHouse/pull/34653) ([Maksim Kita](https://github.com/kitaisreal)). -* `MaterializedMySQL` (experimental feature) now supports `materialized_mysql_tables_list` (a comma-separated list of MySQL database tables, which will be replicated by the MaterializedMySQL database engine. Default value: empty list — means all the tables will be replicated), mentioned at [#32977](https://github.com/ClickHouse/ClickHouse/issues/32977). [#34487](https://github.com/ClickHouse/ClickHouse/pull/34487) ([zzsmdfj](https://github.com/zzsmdfj)). -* Improve OpenTelemetry span logs for INSERT operation on distributed table. [#34480](https://github.com/ClickHouse/ClickHouse/pull/34480) ([Frank Chen](https://github.com/FrankChen021)). -* Make the znode `ctime` and `mtime` consistent between servers in ClickHouse Keeper. [#33441](https://github.com/ClickHouse/ClickHouse/pull/33441) ([小路](https://github.com/nicelulu)). - -#### Build/Testing/Packaging Improvement - -* Package repository is migrated to JFrog Artifactory (**Mikhail f. Shiryaev**). -* Randomize some settings in functional tests, so more possible combinations of settings will be tested. This is yet another fuzzing method to ensure better test coverage. This closes [#32268](https://github.com/ClickHouse/ClickHouse/issues/32268). [#34092](https://github.com/ClickHouse/ClickHouse/pull/34092) ([Kruglov Pavel](https://github.com/Avogar)). -* Drop PVS-Studio from our CI. [#34680](https://github.com/ClickHouse/ClickHouse/pull/34680) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add an ability to build stripped binaries with CMake. In previous versions it was performed by dh-tools. [#35196](https://github.com/ClickHouse/ClickHouse/pull/35196) ([alesapin](https://github.com/alesapin)). -* Smaller "fat-free" `clickhouse-keeper` build. [#35031](https://github.com/ClickHouse/ClickHouse/pull/35031) ([alesapin](https://github.com/alesapin)). -* Use @robot-clickhouse as an author and committer for PRs like https://github.com/ClickHouse/ClickHouse/pull/34685. [#34793](https://github.com/ClickHouse/ClickHouse/pull/34793) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Limit DWARF version for debug info by 4 max, because our internal stack symbolizer cannot parse DWARF version 5. This makes sense if you compile ClickHouse with clang-15. [#34777](https://github.com/ClickHouse/ClickHouse/pull/34777) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Remove `clickhouse-test` debian package as unneeded complication. CI use tests from repository and standalone testing via deb package is no longer supported. [#34606](https://github.com/ClickHouse/ClickHouse/pull/34606) ([Ilya Yatsishin](https://github.com/qoega)). - -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) - -* A fix for HDFS integration: When the inner buffer size is too small, NEED_MORE_INPUT in `HadoopSnappyDecoder` will run multi times (>=3) for one compressed block. This makes the input data be copied into the wrong place in `HadoopSnappyDecoder::buffer`. [#35116](https://github.com/ClickHouse/ClickHouse/pull/35116) ([lgbo](https://github.com/lgbo-ustc)). -* Ignore obsolete grants in ATTACH GRANT statements. This PR fixes [#34815](https://github.com/ClickHouse/ClickHouse/issues/34815). [#34855](https://github.com/ClickHouse/ClickHouse/pull/34855) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix segfault in Postgres database when getting create table query if database was created using named collections. Closes [#35312](https://github.com/ClickHouse/ClickHouse/issues/35312). [#35313](https://github.com/ClickHouse/ClickHouse/pull/35313) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix partial merge join duplicate rows bug, close [#31009](https://github.com/ClickHouse/ClickHouse/issues/31009). [#35311](https://github.com/ClickHouse/ClickHouse/pull/35311) ([Vladimir C](https://github.com/vdimir)). -* Fix possible `Assertion 'position() != working_buffer.end()' failed` while using bzip2 compression with small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35300](https://github.com/ClickHouse/ClickHouse/pull/35300) ([Kruglov Pavel](https://github.com/Avogar)). While using lz4 compression with a small max_read_buffer_size setting value. [#35296](https://github.com/ClickHouse/ClickHouse/pull/35296) ([Kruglov Pavel](https://github.com/Avogar)). While using lzma compression with small `max_read_buffer_size` setting value. [#35295](https://github.com/ClickHouse/ClickHouse/pull/35295) ([Kruglov Pavel](https://github.com/Avogar)). While using `brotli` compression with a small `max_read_buffer_size` setting value. The bug was found in https://github.com/ClickHouse/ClickHouse/pull/35047. [#35281](https://github.com/ClickHouse/ClickHouse/pull/35281) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix possible segfault in `JSONEachRow` schema inference. [#35291](https://github.com/ClickHouse/ClickHouse/pull/35291) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `CHECK TABLE` query in case when sparse columns are enabled in table. [#35274](https://github.com/ClickHouse/ClickHouse/pull/35274) ([Anton Popov](https://github.com/CurtizJ)). -* Avoid std::terminate in case of exception in reading from remote VFS. [#35257](https://github.com/ClickHouse/ClickHouse/pull/35257) ([Azat Khuzhin](https://github.com/azat)). -* Fix reading port from config, close [#34776](https://github.com/ClickHouse/ClickHouse/issues/34776). [#35193](https://github.com/ClickHouse/ClickHouse/pull/35193) ([Vladimir C](https://github.com/vdimir)). -* Fix error in query with `WITH TOTALS` in case if `HAVING` returned empty result. This fixes [#33711](https://github.com/ClickHouse/ClickHouse/issues/33711). [#35186](https://github.com/ClickHouse/ClickHouse/pull/35186) ([Amos Bird](https://github.com/amosbird)). -* Fix a corner case of `replaceRegexpAll`, close [#35117](https://github.com/ClickHouse/ClickHouse/issues/35117). [#35182](https://github.com/ClickHouse/ClickHouse/pull/35182) ([Vladimir C](https://github.com/vdimir)). -* Schema inference didn't work properly on case of `INSERT INTO FUNCTION s3(...) FROM ...`, it tried to read schema from s3 file instead of from select query. [#35176](https://github.com/ClickHouse/ClickHouse/pull/35176) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix MaterializedPostgreSQL (experimental feature) `table overrides` for partition by, etc. Closes [#35048](https://github.com/ClickHouse/ClickHouse/issues/35048). [#35162](https://github.com/ClickHouse/ClickHouse/pull/35162) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix MaterializedPostgreSQL (experimental feature) adding new table to replication (ATTACH TABLE) after manually removing (DETACH TABLE). Closes [#33800](https://github.com/ClickHouse/ClickHouse/issues/33800). Closes [#34922](https://github.com/ClickHouse/ClickHouse/issues/34922). Closes [#34315](https://github.com/ClickHouse/ClickHouse/issues/34315). [#35158](https://github.com/ClickHouse/ClickHouse/pull/35158) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix partition pruning error when non-monotonic function is used with IN operator. This fixes [#35136](https://github.com/ClickHouse/ClickHouse/issues/35136). [#35146](https://github.com/ClickHouse/ClickHouse/pull/35146) ([Amos Bird](https://github.com/amosbird)). -* Fixed slightly incorrect translation of YAML configs to XML. [#35135](https://github.com/ClickHouse/ClickHouse/pull/35135) ([Miel Donkers](https://github.com/mdonkers)). -* Fix `optimize_skip_unused_shards_rewrite_in` for signed columns and negative values. [#35134](https://github.com/ClickHouse/ClickHouse/pull/35134) ([Azat Khuzhin](https://github.com/azat)). -* The `update_lag` external dictionary configuration option was unusable showing the error message ``Unexpected key `update_lag` in dictionary source configuration``. [#35089](https://github.com/ClickHouse/ClickHouse/pull/35089) ([Jason Chu](https://github.com/1lann)). -* Avoid possible deadlock on server shutdown. [#35081](https://github.com/ClickHouse/ClickHouse/pull/35081) ([Azat Khuzhin](https://github.com/azat)). -* Fix missing alias after function is optimized to a subcolumn when setting `optimize_functions_to_subcolumns` is enabled. Closes [#33798](https://github.com/ClickHouse/ClickHouse/issues/33798). [#35079](https://github.com/ClickHouse/ClickHouse/pull/35079) ([qieqieplus](https://github.com/qieqieplus)). -* Fix reading from `system.asynchronous_inserts` table if there exists asynchronous insert into table function. [#35050](https://github.com/ClickHouse/ClickHouse/pull/35050) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible exception `Reading for MergeTree family tables must be done with last position boundary` (relevant to operation on remote VFS). Closes [#34979](https://github.com/ClickHouse/ClickHouse/issues/34979). [#35001](https://github.com/ClickHouse/ClickHouse/pull/35001) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix unexpected result when use -State type aggregate function in window frame. [#34999](https://github.com/ClickHouse/ClickHouse/pull/34999) ([metahys](https://github.com/metahys)). -* Fix possible segfault in FileLog (experimental feature). Closes [#30749](https://github.com/ClickHouse/ClickHouse/issues/30749). [#34996](https://github.com/ClickHouse/ClickHouse/pull/34996) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible rare error `Cannot push block to port which already has data`. [#34993](https://github.com/ClickHouse/ClickHouse/pull/34993) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix wrong schema inference for unquoted dates in CSV. Closes [#34768](https://github.com/ClickHouse/ClickHouse/issues/34768). [#34961](https://github.com/ClickHouse/ClickHouse/pull/34961) ([Kruglov Pavel](https://github.com/Avogar)). -* Integration with Hive: Fix unexpected result when use `in` in `where` in hive query. [#34945](https://github.com/ClickHouse/ClickHouse/pull/34945) ([lgbo](https://github.com/lgbo-ustc)). -* Avoid busy polling in ClickHouse Keeper while searching for changelog files to delete. [#34931](https://github.com/ClickHouse/ClickHouse/pull/34931) ([Azat Khuzhin](https://github.com/azat)). -* Fix DateTime64 conversion from PostgreSQL. Closes [#33364](https://github.com/ClickHouse/ClickHouse/issues/33364). [#34910](https://github.com/ClickHouse/ClickHouse/pull/34910) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible "Part directory doesn't exist" during `INSERT` into MergeTree table backed by VFS over s3. [#34876](https://github.com/ClickHouse/ClickHouse/pull/34876) ([Azat Khuzhin](https://github.com/azat)). -* Support DDLs like CREATE USER to be executed on cross replicated cluster. [#34860](https://github.com/ClickHouse/ClickHouse/pull/34860) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Fix bugs for multiple columns group by in `WindowView` (experimental feature). [#34859](https://github.com/ClickHouse/ClickHouse/pull/34859) ([vxider](https://github.com/Vxider)). -* Fix possible failures in S2 functions when queries contain const columns. [#34745](https://github.com/ClickHouse/ClickHouse/pull/34745) ([Bharat Nallan](https://github.com/bharatnc)). -* Fix bug for H3 funcs containing const columns which cause queries to fail. [#34743](https://github.com/ClickHouse/ClickHouse/pull/34743) ([Bharat Nallan](https://github.com/bharatnc)). -* Fix `No such file or directory` with enabled `fsync_part_directory` and vertical merge. [#34739](https://github.com/ClickHouse/ClickHouse/pull/34739) ([Azat Khuzhin](https://github.com/azat)). -* Fix serialization/printing for system queries `RELOAD MODEL`, `RELOAD FUNCTION`, `RESTART DISK` when used `ON CLUSTER`. Closes [#34514](https://github.com/ClickHouse/ClickHouse/issues/34514). [#34696](https://github.com/ClickHouse/ClickHouse/pull/34696) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix `allow_experimental_projection_optimization` with `enable_global_with_statement` (before it may lead to `Stack size too large` error in case of multiple expressions in `WITH` clause, and also it executes scalar subqueries again and again, so not it will be more optimal). [#34650](https://github.com/ClickHouse/ClickHouse/pull/34650) ([Azat Khuzhin](https://github.com/azat)). -* Stop to select part for mutate when the other replica has already updated the transaction log for `ReplatedMergeTree` engine. [#34633](https://github.com/ClickHouse/ClickHouse/pull/34633) ([Jianmei Zhang](https://github.com/zhangjmruc)). -* Fix incorrect result of trivial count query when part movement feature is used [#34089](https://github.com/ClickHouse/ClickHouse/issues/34089). [#34385](https://github.com/ClickHouse/ClickHouse/pull/34385) ([nvartolomei](https://github.com/nvartolomei)). -* Fix inconsistency of `max_query_size` limitation in distributed subqueries. [#34078](https://github.com/ClickHouse/ClickHouse/pull/34078) ([Chao Ma](https://github.com/godliness)). - -### ClickHouse release v22.2, 2022-02-17 - -#### Upgrade Notes - -* Applying data skipping indexes for queries with FINAL may produce incorrect result. In this release we disabled data skipping indexes by default for queries with FINAL (a new setting `use_skip_indexes_if_final` is introduced and disabled by default). [#34243](https://github.com/ClickHouse/ClickHouse/pull/34243) ([Azat Khuzhin](https://github.com/azat)). - -#### New Feature - -* Projections are production ready. Set `allow_experimental_projection_optimization` by default and deprecate this setting. [#34456](https://github.com/ClickHouse/ClickHouse/pull/34456) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* An option to create a new files on insert for `File`/`S3`/`HDFS` engines. Allow to overwrite a file in `HDFS`. Throw an exception in attempt to overwrite a file in `S3` by default. Throw an exception in attempt to append data to file in formats that have a suffix (and thus don't support appends, like `Parquet`, `ORC`). Closes [#31640](https://github.com/ClickHouse/ClickHouse/issues/31640) Closes [#31622](https://github.com/ClickHouse/ClickHouse/issues/31622) Closes [#23862](https://github.com/ClickHouse/ClickHouse/issues/23862) Closes [#15022](https://github.com/ClickHouse/ClickHouse/issues/15022) Closes [#16674](https://github.com/ClickHouse/ClickHouse/issues/16674). [#33302](https://github.com/ClickHouse/ClickHouse/pull/33302) ([Kruglov Pavel](https://github.com/Avogar)). -* Add a setting that allows a user to provide own deduplication semantic in `MergeTree`/`ReplicatedMergeTree` If provided, it's used instead of data digest to generate block ID. So, for example, by providing a unique value for the setting in each INSERT statement, the user can avoid the same inserted data being deduplicated. This closes: [#7461](https://github.com/ClickHouse/ClickHouse/issues/7461). [#32304](https://github.com/ClickHouse/ClickHouse/pull/32304) ([Igor Nikonov](https://github.com/devcrafter)). -* Add support of `DEFAULT` keyword for INSERT statements. Closes [#6331](https://github.com/ClickHouse/ClickHouse/issues/6331). [#33141](https://github.com/ClickHouse/ClickHouse/pull/33141) ([Andrii Buriachevskyi](https://github.com/1over)). -* `EPHEMERAL` column specifier is added to `CREATE TABLE` query. Closes [#9436](https://github.com/ClickHouse/ClickHouse/issues/9436). [#34424](https://github.com/ClickHouse/ClickHouse/pull/34424) ([yakov-olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Support `IF EXISTS` clause for `TTL expr TO [DISK|VOLUME] [IF EXISTS] 'xxx'` feature. Parts will be moved to disk or volume only if it exists on replica, so `MOVE TTL` rules will be able to behave differently on replicas according to the existing storage policies. Resolves [#34455](https://github.com/ClickHouse/ClickHouse/issues/34455). [#34504](https://github.com/ClickHouse/ClickHouse/pull/34504) ([Anton Popov](https://github.com/CurtizJ)). -* Allow set default table engine and to create tables without specifying ENGINE. [#34187](https://github.com/ClickHouse/ClickHouse/pull/34187) ([Ilya Yatsishin](https://github.com/qoega)). -* Add table function `format(format_name, data)`. [#34125](https://github.com/ClickHouse/ClickHouse/pull/34125) ([Kruglov Pavel](https://github.com/Avogar)). -* Detect format in `clickhouse-local` by file name even in the case when it is passed to stdin. [#33829](https://github.com/ClickHouse/ClickHouse/pull/33829) ([Kruglov Pavel](https://github.com/Avogar)). -* Add schema inference for `values` table function. Closes [#33811](https://github.com/ClickHouse/ClickHouse/issues/33811). [#34017](https://github.com/ClickHouse/ClickHouse/pull/34017) ([Kruglov Pavel](https://github.com/Avogar)). -* Dynamic reload of server TLS certificates on config reload. Closes [#15764](https://github.com/ClickHouse/ClickHouse/issues/15764). [#15765](https://github.com/ClickHouse/ClickHouse/pull/15765) ([johnskopis](https://github.com/johnskopis)). [#31257](https://github.com/ClickHouse/ClickHouse/pull/31257) ([Filatenkov Artur](https://github.com/FArthur-cmd)). -* Now ReplicatedMergeTree can recover data when some of its disks are broken. [#13544](https://github.com/ClickHouse/ClickHouse/pull/13544) ([Amos Bird](https://github.com/amosbird)). -* Fault-tolerant connections in clickhouse-client: `clickhouse-client ... --host host1 --host host2 --port port2 --host host3 --port port --host host4`. [#34490](https://github.com/ClickHouse/ClickHouse/pull/34490) ([Kruglov Pavel](https://github.com/Avogar)). [#33824](https://github.com/ClickHouse/ClickHouse/pull/33824) ([Filippov Denis](https://github.com/DF5HSE)). -* Add `DEGREES` and `RADIANS` functions for MySQL compatibility. [#33769](https://github.com/ClickHouse/ClickHouse/pull/33769) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `h3ToCenterChild` function. [#33313](https://github.com/ClickHouse/ClickHouse/pull/33313) ([Bharat Nallan](https://github.com/bharatnc)). Add new h3 miscellaneous functions: `edgeLengthKm`,`exactEdgeLengthKm`,`exactEdgeLengthM`,`exactEdgeLengthRads`,`numHexagons`. [#33621](https://github.com/ClickHouse/ClickHouse/pull/33621) ([Bharat Nallan](https://github.com/bharatnc)). -* Add function `bitSlice` to extract bit subsequences from String/FixedString. [#33360](https://github.com/ClickHouse/ClickHouse/pull/33360) ([RogerYK](https://github.com/RogerYK)). -* Implemented `meanZTest` aggregate function. [#33354](https://github.com/ClickHouse/ClickHouse/pull/33354) ([achimbab](https://github.com/achimbab)). -* Add confidence intervals to T-tests aggregate functions. [#33260](https://github.com/ClickHouse/ClickHouse/pull/33260) ([achimbab](https://github.com/achimbab)). -* Add function `addressToLineWithInlines`. Close [#26211](https://github.com/ClickHouse/ClickHouse/issues/26211). [#33467](https://github.com/ClickHouse/ClickHouse/pull/33467) ([SuperDJY](https://github.com/cmsxbc)). -* Added `#!` and `# ` as a recognised start of a single line comment. Closes [#34138](https://github.com/ClickHouse/ClickHouse/issues/34138). [#34230](https://github.com/ClickHouse/ClickHouse/pull/34230) ([Aaron Katz](https://github.com/aaronstephenkatz)). - -#### Experimental Feature - -* Functions for text classification: language and charset detection. See [#23271](https://github.com/ClickHouse/ClickHouse/issues/23271). [#33314](https://github.com/ClickHouse/ClickHouse/pull/33314) ([Nikolay Degterinsky](https://github.com/evillique)). -* Add memory overcommit to `MemoryTracker`. Added `guaranteed` settings for memory limits which represent soft memory limits. In case when hard memory limit is reached, `MemoryTracker` tries to cancel the most overcommited query. New setting `memory_usage_overcommit_max_wait_microseconds` specifies how long queries may wait another query to stop. Closes [#28375](https://github.com/ClickHouse/ClickHouse/issues/28375). [#31182](https://github.com/ClickHouse/ClickHouse/pull/31182) ([Dmitry Novik](https://github.com/novikd)). -* Enable stream to table join in WindowView. [#33729](https://github.com/ClickHouse/ClickHouse/pull/33729) ([vxider](https://github.com/Vxider)). -* Support `SET`, `YEAR`, `TIME` and `GEOMETRY` data types in `MaterializedMySQL` (experimental feature). Fixes [#18091](https://github.com/ClickHouse/ClickHouse/issues/18091), [#21536](https://github.com/ClickHouse/ClickHouse/issues/21536), [#26361](https://github.com/ClickHouse/ClickHouse/issues/26361). [#33429](https://github.com/ClickHouse/ClickHouse/pull/33429) ([zzsmdfj](https://github.com/zzsmdfj)). -* Fix various issues when projection is enabled by default. Each issue is described in separate commit. This is for [#33678](https://github.com/ClickHouse/ClickHouse/issues/33678) . This fixes [#34273](https://github.com/ClickHouse/ClickHouse/issues/34273). [#34305](https://github.com/ClickHouse/ClickHouse/pull/34305) ([Amos Bird](https://github.com/amosbird)). - -#### Performance Improvement - -* Support `optimize_read_in_order` if prefix of sorting key is already sorted. E.g. if we have sorting key `ORDER BY (a, b)` in table and query with `WHERE a = const ORDER BY b` clauses, now it will be applied reading in order of sorting key instead of full sort. [#32748](https://github.com/ClickHouse/ClickHouse/pull/32748) ([Anton Popov](https://github.com/CurtizJ)). -* Improve performance of partitioned insert into table functions `URL`, `S3`, `File`, `HDFS`. Closes [#34348](https://github.com/ClickHouse/ClickHouse/issues/34348). [#34510](https://github.com/ClickHouse/ClickHouse/pull/34510) ([Maksim Kita](https://github.com/kitaisreal)). -* Multiple performance improvements of clickhouse-keeper. [#34484](https://github.com/ClickHouse/ClickHouse/pull/34484) [#34587](https://github.com/ClickHouse/ClickHouse/pull/34587) ([zhanglistar](https://github.com/zhanglistar)). -* `FlatDictionary` improve performance of dictionary data load. [#33871](https://github.com/ClickHouse/ClickHouse/pull/33871) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve performance of `mapPopulateSeries` function. Closes [#33944](https://github.com/ClickHouse/ClickHouse/issues/33944). [#34318](https://github.com/ClickHouse/ClickHouse/pull/34318) ([Maksim Kita](https://github.com/kitaisreal)). -* `_file` and `_path` virtual columns (in file-like table engines) are made `LowCardinality` - it will make queries for multiple files faster. Closes [#34300](https://github.com/ClickHouse/ClickHouse/issues/34300). [#34317](https://github.com/ClickHouse/ClickHouse/pull/34317) ([flynn](https://github.com/ucasfl)). -* Speed up loading of data parts. It was not parallelized before: the setting `part_loading_threads` did not have effect. See [#4699](https://github.com/ClickHouse/ClickHouse/issues/4699). [#34310](https://github.com/ClickHouse/ClickHouse/pull/34310) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve performance of `LineAsString` format. This closes [#34303](https://github.com/ClickHouse/ClickHouse/issues/34303). [#34306](https://github.com/ClickHouse/ClickHouse/pull/34306) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Optimize `quantilesExact{Low,High}` to use `nth_element` instead of `sort`. [#34287](https://github.com/ClickHouse/ClickHouse/pull/34287) ([Danila Kutenin](https://github.com/danlark1)). -* Slightly improve performance of `Regexp` format. [#34202](https://github.com/ClickHouse/ClickHouse/pull/34202) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Minor improvement for analysis of scalar subqueries. [#34128](https://github.com/ClickHouse/ClickHouse/pull/34128) ([Federico Rodriguez](https://github.com/fedrod)). -* Make ORDER BY tuple almost as fast as ORDER BY columns. We have special optimizations for multiple column ORDER BY: https://github.com/ClickHouse/ClickHouse/pull/10831 . It's beneficial to also apply to tuple columns. [#34060](https://github.com/ClickHouse/ClickHouse/pull/34060) ([Amos Bird](https://github.com/amosbird)). -* Rework and reintroduce the scalar subqueries cache to Materialized Views execution. [#33958](https://github.com/ClickHouse/ClickHouse/pull/33958) ([Raúl Marín](https://github.com/Algunenano)). -* Slightly improve performance of `ORDER BY` by adding x86-64 AVX-512 support for `memcmpSmall` functions to accelerate memory comparison. It works only if you compile ClickHouse by yourself. [#33706](https://github.com/ClickHouse/ClickHouse/pull/33706) ([hanqf-git](https://github.com/hanqf-git)). -* Improve `range_hashed` dictionary performance if for key there are a lot of intervals. Fixes [#23821](https://github.com/ClickHouse/ClickHouse/issues/23821). [#33516](https://github.com/ClickHouse/ClickHouse/pull/33516) ([Maksim Kita](https://github.com/kitaisreal)). -* For inserts and merges into S3, write files in parallel whenever possible (TODO: check if it's merged). [#33291](https://github.com/ClickHouse/ClickHouse/pull/33291) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Improve `clickhouse-keeper` performance and fix several memory leaks in NuRaft library. [#33329](https://github.com/ClickHouse/ClickHouse/pull/33329) ([alesapin](https://github.com/alesapin)). - -#### Improvement - -* Support asynchronous inserts in `clickhouse-client` for queries with inlined data. [#34267](https://github.com/ClickHouse/ClickHouse/pull/34267) ([Anton Popov](https://github.com/CurtizJ)). -* Functions `dictGet`, `dictHas` implicitly cast key argument to dictionary key structure, if they are different. [#33672](https://github.com/ClickHouse/ClickHouse/pull/33672) ([Maksim Kita](https://github.com/kitaisreal)). -* Improvements for `range_hashed` dictionaries. Improve performance of load time if there are multiple attributes. Allow to create a dictionary without attributes. Added option to specify strategy when intervals `start` and `end` have `Nullable` type `convert_null_range_bound_to_open` by default is `true`. Closes [#29791](https://github.com/ClickHouse/ClickHouse/issues/29791). Allow to specify `Float`, `Decimal`, `DateTime64`, `Int128`, `Int256`, `UInt128`, `UInt256` as range types. `RangeHashedDictionary` added support for range values that extend `Int64` type. Closes [#28322](https://github.com/ClickHouse/ClickHouse/issues/28322). Added option `range_lookup_strategy` to specify range lookup type `min`, `max` by default is `min` . Closes [#21647](https://github.com/ClickHouse/ClickHouse/issues/21647). Fixed allocated bytes calculations. Fixed type name in `system.dictionaries` in case of `ComplexKeyHashedDictionary`. [#33927](https://github.com/ClickHouse/ClickHouse/pull/33927) ([Maksim Kita](https://github.com/kitaisreal)). -* `flat`, `hashed`, `hashed_array` dictionaries now support creating with empty attributes, with support of reading the keys and using `dictHas`. Fixes [#33820](https://github.com/ClickHouse/ClickHouse/issues/33820). [#33918](https://github.com/ClickHouse/ClickHouse/pull/33918) ([Maksim Kita](https://github.com/kitaisreal)). -* Added support for `DateTime64` data type in dictionaries. [#33914](https://github.com/ClickHouse/ClickHouse/pull/33914) ([Maksim Kita](https://github.com/kitaisreal)). -* Allow to write `s3(url, access_key_id, secret_access_key)` (autodetect of data format and table structure, but with explicit credentials). [#34503](https://github.com/ClickHouse/ClickHouse/pull/34503) ([Kruglov Pavel](https://github.com/Avogar)). -* Added sending of the output format back to client like it's done in HTTP protocol as suggested in [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). Closes [#34362](https://github.com/ClickHouse/ClickHouse/issues/34362). [#34499](https://github.com/ClickHouse/ClickHouse/pull/34499) ([Vitaly Baranov](https://github.com/vitlibar)). -* Send ProfileEvents statistics in case of INSERT SELECT query (to display query metrics in `clickhouse-client` for this type of queries). [#34498](https://github.com/ClickHouse/ClickHouse/pull/34498) ([Dmitry Novik](https://github.com/novikd)). -* Recognize `.jsonl` extension for JSONEachRow format. [#34496](https://github.com/ClickHouse/ClickHouse/pull/34496) ([Kruglov Pavel](https://github.com/Avogar)). -* Improve schema inference in clickhouse-local. Allow to write just `clickhouse-local -q "select * from table" < data.format`. [#34495](https://github.com/ClickHouse/ClickHouse/pull/34495) ([Kruglov Pavel](https://github.com/Avogar)). -* Privileges CREATE/ALTER/DROP ROW POLICY now can be granted on a table or on `database.*` as well as globally `*.*`. [#34489](https://github.com/ClickHouse/ClickHouse/pull/34489) ([Vitaly Baranov](https://github.com/vitlibar)). -* Allow to export arbitrary large files to `s3`. Add two new settings: `s3_upload_part_size_multiply_factor` and `s3_upload_part_size_multiply_parts_count_threshold`. Now each time `s3_upload_part_size_multiply_parts_count_threshold` uploaded to S3 from a single query `s3_min_upload_part_size` multiplied by `s3_upload_part_size_multiply_factor`. Fixes [#34244](https://github.com/ClickHouse/ClickHouse/issues/34244). [#34422](https://github.com/ClickHouse/ClickHouse/pull/34422) ([alesapin](https://github.com/alesapin)). -* Allow to skip not found (404) URLs for globs when using URL storage / table function. Also closes [#34359](https://github.com/ClickHouse/ClickHouse/issues/34359). [#34392](https://github.com/ClickHouse/ClickHouse/pull/34392) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Default input and output formats for `clickhouse-local` that can be overriden by --input-format and --output-format. Close [#30631](https://github.com/ClickHouse/ClickHouse/issues/30631). [#34352](https://github.com/ClickHouse/ClickHouse/pull/34352) ([李扬](https://github.com/taiyang-li)). -* Add options for `clickhouse-format`. Which close [#30528](https://github.com/ClickHouse/ClickHouse/issues/30528) - `max_query_size` - `max_parser_depth`. [#34349](https://github.com/ClickHouse/ClickHouse/pull/34349) ([李扬](https://github.com/taiyang-li)). -* Better handling of pre-inputs before client start. This is for [#34308](https://github.com/ClickHouse/ClickHouse/issues/34308). [#34336](https://github.com/ClickHouse/ClickHouse/pull/34336) ([Amos Bird](https://github.com/amosbird)). -* `REGEXP_MATCHES` and `REGEXP_REPLACE` function aliases for compatibility with PostgreSQL. Close [#30885](https://github.com/ClickHouse/ClickHouse/issues/30885). [#34334](https://github.com/ClickHouse/ClickHouse/pull/34334) ([李扬](https://github.com/taiyang-li)). -* Some servers expect a User-Agent header in their HTTP requests. A `User-Agent` header entry has been added to HTTP requests of the form: User-Agent: ClickHouse/VERSION_STRING. [#34330](https://github.com/ClickHouse/ClickHouse/pull/34330) ([Saad Ur Rahman](https://github.com/surahman)). -* Cancel merges before acquiring table lock for `TRUNCATE` query to avoid `DEADLOCK_AVOIDED` error in some cases. Fixes [#34302](https://github.com/ClickHouse/ClickHouse/issues/34302). [#34304](https://github.com/ClickHouse/ClickHouse/pull/34304) ([tavplubix](https://github.com/tavplubix)). -* Change severity of the "Cancelled merging parts" message in logs, because it's not an error. This closes [#34148](https://github.com/ClickHouse/ClickHouse/issues/34148). [#34232](https://github.com/ClickHouse/ClickHouse/pull/34232) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add ability to compose PostgreSQL-style cast operator `::` with expressions using `[]` and `.` operators (array and tuple indexing). [#34229](https://github.com/ClickHouse/ClickHouse/pull/34229) ([Nikolay Degterinsky](https://github.com/evillique)). -* Recognize `YYYYMMDD-hhmmss` format in `parseDateTimeBestEffort` function. This closes [#34206](https://github.com/ClickHouse/ClickHouse/issues/34206). [#34208](https://github.com/ClickHouse/ClickHouse/pull/34208) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow carriage return in the middle of the line while parsing by `Regexp` format. This closes [#34200](https://github.com/ClickHouse/ClickHouse/issues/34200). [#34205](https://github.com/ClickHouse/ClickHouse/pull/34205) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow to parse dictionary's `PRIMARY KEY` as `PRIMARY KEY (id, value)`; previously supported only `PRIMARY KEY id, value`. Closes [#34135](https://github.com/ClickHouse/ClickHouse/issues/34135). [#34141](https://github.com/ClickHouse/ClickHouse/pull/34141) ([Maksim Kita](https://github.com/kitaisreal)). -* An optional argument for `splitByChar` to limit the number of resulting elements. close [#34081](https://github.com/ClickHouse/ClickHouse/issues/34081). [#34140](https://github.com/ClickHouse/ClickHouse/pull/34140) ([李扬](https://github.com/taiyang-li)). -* Improving the experience of multiple line editing for clickhouse-client. This is a follow-up of [#31123](https://github.com/ClickHouse/ClickHouse/pull/31123). [#34114](https://github.com/ClickHouse/ClickHouse/pull/34114) ([Amos Bird](https://github.com/amosbird)). -* Add `UUID` suport in `MsgPack` input/output format. [#34065](https://github.com/ClickHouse/ClickHouse/pull/34065) ([Kruglov Pavel](https://github.com/Avogar)). -* Tracing context (for OpenTelemetry) is now propagated from GRPC client metadata (this change is relevant for GRPC client-server protocol). [#34064](https://github.com/ClickHouse/ClickHouse/pull/34064) ([andremarianiello](https://github.com/andremarianiello)). -* Supports all types of `SYSTEM` queries with `ON CLUSTER` clause. [#34005](https://github.com/ClickHouse/ClickHouse/pull/34005) ([小路](https://github.com/nicelulu)). -* Improve memory accounting for queries that are using less than `max_untracker_memory`. [#34001](https://github.com/ClickHouse/ClickHouse/pull/34001) ([Azat Khuzhin](https://github.com/azat)). -* Fixed UTF-8 string case-insensitive search when lowercase and uppercase characters are represented by different number of bytes. Example is `ẞ` and `ß`. This closes [#7334](https://github.com/ClickHouse/ClickHouse/issues/7334). [#33992](https://github.com/ClickHouse/ClickHouse/pull/33992) ([Harry Lee](https://github.com/HarryLeeIBM)). -* Detect format and schema from stdin in `clickhouse-local`. [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960) ([Kruglov Pavel](https://github.com/Avogar)). -* Correctly handle the case of misconfiguration when multiple disks are using the same path on the filesystem. [#29072](https://github.com/ClickHouse/ClickHouse/issues/29072). [#33905](https://github.com/ClickHouse/ClickHouse/pull/33905) ([zhongyuankai](https://github.com/zhongyuankai)). -* Try every resolved IP address while getting S3 proxy. S3 proxies are rarely used, mostly in Yandex Cloud. [#33862](https://github.com/ClickHouse/ClickHouse/pull/33862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Support EXPLAIN AST CREATE FUNCTION query `EXPLAIN AST CREATE FUNCTION mycast AS (n) -> cast(n as String)` will return `EXPLAIN AST CREATE FUNCTION mycast AS n -> CAST(n, 'String')`. [#33819](https://github.com/ClickHouse/ClickHouse/pull/33819) ([李扬](https://github.com/taiyang-li)). -* Added support for cast from `Map(Key, Value)` to `Array(Tuple(Key, Value))`. [#33794](https://github.com/ClickHouse/ClickHouse/pull/33794) ([Maksim Kita](https://github.com/kitaisreal)). -* Add some improvements and fixes for `Bool` data type. Fixes [#33244](https://github.com/ClickHouse/ClickHouse/issues/33244). [#33737](https://github.com/ClickHouse/ClickHouse/pull/33737) ([Kruglov Pavel](https://github.com/Avogar)). -* Parse and store OpenTelemetry trace-id in big-endian order. [#33723](https://github.com/ClickHouse/ClickHouse/pull/33723) ([Frank Chen](https://github.com/FrankChen021)). -* Improvement for `fromUnixTimestamp64` family functions.. They now accept any integer value that can be converted to `Int64`. This closes: [#14648](https://github.com/ClickHouse/ClickHouse/issues/14648). [#33505](https://github.com/ClickHouse/ClickHouse/pull/33505) ([Andrey Zvonov](https://github.com/zvonand)). -* Reimplement `_shard_num` from constants (see [#7624](https://github.com/ClickHouse/ClickHouse/issues/7624)) with `shardNum()` function (seee [#27020](https://github.com/ClickHouse/ClickHouse/issues/27020)), to avoid possible issues (like those that had been found in [#16947](https://github.com/ClickHouse/ClickHouse/issues/16947)). [#33392](https://github.com/ClickHouse/ClickHouse/pull/33392) ([Azat Khuzhin](https://github.com/azat)). -* Enable binary arithmetic (plus, minus, multiply, division, least, greatest) between Decimal and Float. [#33355](https://github.com/ClickHouse/ClickHouse/pull/33355) ([flynn](https://github.com/ucasfl)). -* Respect cgroups limits in max_threads autodetection. [#33342](https://github.com/ClickHouse/ClickHouse/pull/33342) ([JaySon](https://github.com/JaySon-Huang)). -* Add new clickhouse-keeper setting `min_session_timeout_ms`. Now clickhouse-keeper will determine client session timeout according to `min_session_timeout_ms` and `session_timeout_ms` settings. [#33288](https://github.com/ClickHouse/ClickHouse/pull/33288) ([JackyWoo](https://github.com/JackyWoo)). -* Added `UUID` data type support for functions `hex` and `bin`. [#32170](https://github.com/ClickHouse/ClickHouse/pull/32170) ([Frank Chen](https://github.com/FrankChen021)). -* Fix reading of subcolumns with dots in their names. In particular fixed reading of `Nested` columns, if their element names contain dots (e.g ```Nested(`keys.name` String, `keys.id` UInt64, values UInt64)```). [#34228](https://github.com/ClickHouse/ClickHouse/pull/34228) ([Anton Popov](https://github.com/CurtizJ)). -* Fixes `parallel_view_processing = 0` not working when inserting into a table using `VALUES`. - Fixes `view_duration_ms` in the `query_views_log` not being set correctly for materialized views. [#34067](https://github.com/ClickHouse/ClickHouse/pull/34067) ([Raúl Marín](https://github.com/Algunenano)). -* Fix parsing tables structure from ZooKeeper: now metadata from ZooKeeper compared with local metadata in canonical form. It helps when canonical function names can change between ClickHouse versions. [#33933](https://github.com/ClickHouse/ClickHouse/pull/33933) ([sunny](https://github.com/sunny19930321)). -* Properly escape some characters for interaction with LDAP. [#33401](https://github.com/ClickHouse/ClickHouse/pull/33401) ([IlyaTsoi](https://github.com/IlyaTsoi)). - -#### Build/Testing/Packaging Improvement - -* Remove unbundled build support. [#33690](https://github.com/ClickHouse/ClickHouse/pull/33690) ([Azat Khuzhin](https://github.com/azat)). -* Ensure that tests don't depend on the result of non-stable sorting of equal elements. Added equal items ranges randomization in debug after sort to prevent issues when we rely on equal items sort order. [#34393](https://github.com/ClickHouse/ClickHouse/pull/34393) ([Maksim Kita](https://github.com/kitaisreal)). -* Add verbosity to a style check. [#34289](https://github.com/ClickHouse/ClickHouse/pull/34289) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Remove `clickhouse-test` debian package because it's obsolete. [#33948](https://github.com/ClickHouse/ClickHouse/pull/33948) ([Ilya Yatsishin](https://github.com/qoega)). -* Multiple improvements for build system to remove the possibility of occasionally using packages from the OS and to enforce hermetic builds. [#33695](https://github.com/ClickHouse/ClickHouse/pull/33695) ([Amos Bird](https://github.com/amosbird)). - -#### Bug Fix (user-visible misbehaviour in official stable or prestable release) - -* Fixed the assertion in case of using `allow_experimental_parallel_reading_from_replicas` with `max_parallel_replicas` equals to 1. This fixes [#34525](https://github.com/ClickHouse/ClickHouse/issues/34525). [#34613](https://github.com/ClickHouse/ClickHouse/pull/34613) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Fix rare bug while reading of empty arrays, which could lead to `Data compressed with different methods` error. It can reproduce if you have mostly empty arrays, but not always. And reading is performed in backward direction with ORDER BY ... DESC. This error is extremely unlikely to happen. [#34327](https://github.com/ClickHouse/ClickHouse/pull/34327) ([Anton Popov](https://github.com/CurtizJ)). -* Fix wrong result of `round`/`roundBankers` if integer values of small types are rounded. Closes [#33267](https://github.com/ClickHouse/ClickHouse/issues/33267). [#34562](https://github.com/ClickHouse/ClickHouse/pull/34562) ([李扬](https://github.com/taiyang-li)). -* Sometimes query cancellation did not work immediately when we were reading multiple files from s3 or HDFS. Fixes [#34301](https://github.com/ClickHouse/ClickHouse/issues/34301) Relates to [#34397](https://github.com/ClickHouse/ClickHouse/issues/34397). [#34539](https://github.com/ClickHouse/ClickHouse/pull/34539) ([Dmitry Novik](https://github.com/novikd)). -* Fix exception `Chunk should have AggregatedChunkInfo in MergingAggregatedTransform` (in case of `optimize_aggregation_in_order = 1` and `distributed_aggregation_memory_efficient = 0`). Fixes [#34526](https://github.com/ClickHouse/ClickHouse/issues/34526). [#34532](https://github.com/ClickHouse/ClickHouse/pull/34532) ([Anton Popov](https://github.com/CurtizJ)). -* Fix comparison between integers and floats in index analysis. Previously it could lead to skipping some granules for reading by mistake. Fixes [#34493](https://github.com/ClickHouse/ClickHouse/issues/34493). [#34528](https://github.com/ClickHouse/ClickHouse/pull/34528) ([Anton Popov](https://github.com/CurtizJ)). -* Fix compression support in URL engine. [#34524](https://github.com/ClickHouse/ClickHouse/pull/34524) ([Frank Chen](https://github.com/FrankChen021)). -* Fix possible error 'file_size: Operation not supported' in files' schema autodetection. [#34479](https://github.com/ClickHouse/ClickHouse/pull/34479) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixes possible race with table deletion. [#34416](https://github.com/ClickHouse/ClickHouse/pull/34416) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix possible error `Cannot convert column Function to mask` in short circuit function evaluation. Closes [#34171](https://github.com/ClickHouse/ClickHouse/issues/34171). [#34415](https://github.com/ClickHouse/ClickHouse/pull/34415) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix potential crash when doing schema inference from url source. Closes [#34147](https://github.com/ClickHouse/ClickHouse/issues/34147). [#34405](https://github.com/ClickHouse/ClickHouse/pull/34405) ([Kruglov Pavel](https://github.com/Avogar)). -* For UDFs access permissions were checked for database level instead of global level as it should be. Closes [#34281](https://github.com/ClickHouse/ClickHouse/issues/34281). [#34404](https://github.com/ClickHouse/ClickHouse/pull/34404) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix wrong engine syntax in result of `SHOW CREATE DATABASE` query for databases with engine `Memory`. This closes [#34335](https://github.com/ClickHouse/ClickHouse/issues/34335). [#34345](https://github.com/ClickHouse/ClickHouse/pull/34345) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fixed a couple of extremely rare race conditions that might lead to broken state of replication queue and "intersecting parts" error. [#34297](https://github.com/ClickHouse/ClickHouse/pull/34297) ([tavplubix](https://github.com/tavplubix)). -* Fix progress bar width. It was incorrectly rounded to integer number of characters. [#34275](https://github.com/ClickHouse/ClickHouse/pull/34275) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix current_user/current_address client information fields for inter-server communication (before this patch current_user/current_address will be preserved from the previous query). [#34263](https://github.com/ClickHouse/ClickHouse/pull/34263) ([Azat Khuzhin](https://github.com/azat)). -* Fix memory leak in case of some Exception during query processing with `optimize_aggregation_in_order=1`. [#34234](https://github.com/ClickHouse/ClickHouse/pull/34234) ([Azat Khuzhin](https://github.com/azat)). -* Fix metric `Query`, which shows the number of executing queries. In last several releases it was always 0. [#34224](https://github.com/ClickHouse/ClickHouse/pull/34224) ([Anton Popov](https://github.com/CurtizJ)). -* Fix schema inference for table runction `s3`. [#34186](https://github.com/ClickHouse/ClickHouse/pull/34186) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix rare and benign race condition in `HDFS`, `S3` and `URL` storage engines which can lead to additional connections. [#34172](https://github.com/ClickHouse/ClickHouse/pull/34172) ([alesapin](https://github.com/alesapin)). -* Fix bug which can rarely lead to error "Cannot read all data" while reading LowCardinality columns of MergeTree table engines family which stores data on remote file system like S3 (virtual filesystem over s3 is an experimental feature that is not ready for production). [#34139](https://github.com/ClickHouse/ClickHouse/pull/34139) ([alesapin](https://github.com/alesapin)). -* Fix inserts to distributed tables in case of a change of native protocol. The last change was in the version 22.1, so there may be some failures of inserts to distributed tables after upgrade to that version. [#34132](https://github.com/ClickHouse/ClickHouse/pull/34132) ([Anton Popov](https://github.com/CurtizJ)). -* Fix possible data race in `File` table engine that was introduced in [#33960](https://github.com/ClickHouse/ClickHouse/pull/33960). Closes [#34111](https://github.com/ClickHouse/ClickHouse/issues/34111). [#34113](https://github.com/ClickHouse/ClickHouse/pull/34113) ([Kruglov Pavel](https://github.com/Avogar)). -* Fixed minor race condition that might cause "intersecting parts" error in extremely rare cases after ZooKeeper connection loss. [#34096](https://github.com/ClickHouse/ClickHouse/pull/34096) ([tavplubix](https://github.com/tavplubix)). -* Fix asynchronous inserts with `Native` format. [#34068](https://github.com/ClickHouse/ClickHouse/pull/34068) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bug which lead to inability for server to start when both replicated access storage and keeper (embedded in clickhouse-server) are used. Introduced two settings for keeper socket timeout instead of settings from default user: `keeper_server.socket_receive_timeout_sec` and `keeper_server.socket_send_timeout_sec`. Fixes [#33973](https://github.com/ClickHouse/ClickHouse/issues/33973). [#33988](https://github.com/ClickHouse/ClickHouse/pull/33988) ([alesapin](https://github.com/alesapin)). -* Fix segfault while parsing ORC file with corrupted footer. Closes [#33797](https://github.com/ClickHouse/ClickHouse/issues/33797). [#33984](https://github.com/ClickHouse/ClickHouse/pull/33984) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix parsing IPv6 from query parameter (prepared statements) and fix IPv6 to string conversion. Closes [#33928](https://github.com/ClickHouse/ClickHouse/issues/33928). [#33971](https://github.com/ClickHouse/ClickHouse/pull/33971) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix crash while reading of nested tuples. Fixes [#33838](https://github.com/ClickHouse/ClickHouse/issues/33838). [#33956](https://github.com/ClickHouse/ClickHouse/pull/33956) ([Anton Popov](https://github.com/CurtizJ)). -* Fix usage of functions `array` and `tuple` with literal arguments in distributed queries. Previously it could lead to `Not found columns` exception. [#33938](https://github.com/ClickHouse/ClickHouse/pull/33938) ([Anton Popov](https://github.com/CurtizJ)). -* Aggregate function combinator `-If` did not correctly process `Nullable` filter argument. This closes [#27073](https://github.com/ClickHouse/ClickHouse/issues/27073). [#33920](https://github.com/ClickHouse/ClickHouse/pull/33920) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix potential race condition when doing remote disk read (virtual filesystem over s3 is an experimental feature that is not ready for production). [#33912](https://github.com/ClickHouse/ClickHouse/pull/33912) ([Amos Bird](https://github.com/amosbird)). -* Fix crash if SQL UDF is created with lambda with non identifier arguments. Closes [#33866](https://github.com/ClickHouse/ClickHouse/issues/33866). [#33868](https://github.com/ClickHouse/ClickHouse/pull/33868) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix usage of sparse columns (which can be enabled by experimental setting `ratio_of_defaults_for_sparse_serialization`). [#33849](https://github.com/ClickHouse/ClickHouse/pull/33849) ([Anton Popov](https://github.com/CurtizJ)). -* Fixed `replica is not readonly` logical error on `SYSTEM RESTORE REPLICA` query when replica is actually readonly. Fixes [#33806](https://github.com/ClickHouse/ClickHouse/issues/33806). [#33847](https://github.com/ClickHouse/ClickHouse/pull/33847) ([tavplubix](https://github.com/tavplubix)). -* Fix memory leak in `clickhouse-keeper` in case of compression is used (default). [#33840](https://github.com/ClickHouse/ClickHouse/pull/33840) ([Azat Khuzhin](https://github.com/azat)). -* Fix index analysis with no common types available. [#33833](https://github.com/ClickHouse/ClickHouse/pull/33833) ([Amos Bird](https://github.com/amosbird)). -* Fix schema inference for `JSONEachRow` and `JSONCompactEachRow`. [#33830](https://github.com/ClickHouse/ClickHouse/pull/33830) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix usage of external dictionaries with `redis` source and large number of keys. [#33804](https://github.com/ClickHouse/ClickHouse/pull/33804) ([Anton Popov](https://github.com/CurtizJ)). -* Fix bug in client that led to 'Connection reset by peer' in server. Closes [#33309](https://github.com/ClickHouse/ClickHouse/issues/33309). [#33790](https://github.com/ClickHouse/ClickHouse/pull/33790) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix parsing query INSERT INTO ... VALUES SETTINGS ... (...), ... [#33776](https://github.com/ClickHouse/ClickHouse/pull/33776) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix bug of check table when creating data part with wide format and projection. [#33774](https://github.com/ClickHouse/ClickHouse/pull/33774) ([李扬](https://github.com/taiyang-li)). -* Fix tiny race between count() and INSERT/merges/... in MergeTree (it is possible to return incorrect number of rows for SELECT with optimize_trivial_count_query). [#33753](https://github.com/ClickHouse/ClickHouse/pull/33753) ([Azat Khuzhin](https://github.com/azat)). -* Throw exception when directory listing request has failed in storage HDFS. [#33724](https://github.com/ClickHouse/ClickHouse/pull/33724) ([LiuNeng](https://github.com/liuneng1994)). -* Fix mutation when table contains projections. This fixes [#33010](https://github.com/ClickHouse/ClickHouse/issues/33010). This fixes [#33275](https://github.com/ClickHouse/ClickHouse/issues/33275). [#33679](https://github.com/ClickHouse/ClickHouse/pull/33679) ([Amos Bird](https://github.com/amosbird)). -* Correctly determine current database if `CREATE TEMPORARY TABLE AS SELECT` is queried inside a named HTTP session. This is a very rare use case. This closes [#8340](https://github.com/ClickHouse/ClickHouse/issues/8340). [#33676](https://github.com/ClickHouse/ClickHouse/pull/33676) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Allow some queries with sorting, LIMIT BY, ARRAY JOIN and lambda functions. This closes [#7462](https://github.com/ClickHouse/ClickHouse/issues/7462). [#33675](https://github.com/ClickHouse/ClickHouse/pull/33675) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix bug in "zero copy replication" (a feature that is under development and should not be used in production) which lead to data duplication in case of TTL move. Fixes [#33643](https://github.com/ClickHouse/ClickHouse/issues/33643). [#33642](https://github.com/ClickHouse/ClickHouse/pull/33642) ([alesapin](https://github.com/alesapin)). -* Fix `Chunk should have AggregatedChunkInfo in GroupingAggregatedTransform` (in case of `optimize_aggregation_in_order = 1`). [#33637](https://github.com/ClickHouse/ClickHouse/pull/33637) ([Azat Khuzhin](https://github.com/azat)). -* Fix error `Bad cast from type ... to DB::DataTypeArray` which may happen when table has `Nested` column with dots in name, and default value is generated for it (e.g. during insert, when column is not listed). Continuation of [#28762](https://github.com/ClickHouse/ClickHouse/issues/28762). [#33588](https://github.com/ClickHouse/ClickHouse/pull/33588) ([Alexey Pavlenko](https://github.com/alexeypavlenko)). -* Export into `lz4` files has been fixed. Closes [#31421](https://github.com/ClickHouse/ClickHouse/issues/31421). [#31862](https://github.com/ClickHouse/ClickHouse/pull/31862) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix potential crash if `group_by_overflow_mode` was set to `any` (approximate GROUP BY) and aggregation was performed by single column of type `LowCardinality`. [#34506](https://github.com/ClickHouse/ClickHouse/pull/34506) ([DR](https://github.com/freedomDR)). -* Fix inserting to temporary tables via gRPC client-server protocol. Fixes [#34347](https://github.com/ClickHouse/ClickHouse/issues/34347), issue `#2`. [#34364](https://github.com/ClickHouse/ClickHouse/pull/34364) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix issue [#19429](https://github.com/ClickHouse/ClickHouse/issues/19429). [#34225](https://github.com/ClickHouse/ClickHouse/pull/34225) ([Vitaly Baranov](https://github.com/vitlibar)). -* Fix issue [#18206](https://github.com/ClickHouse/ClickHouse/issues/18206). [#33977](https://github.com/ClickHouse/ClickHouse/pull/33977) ([Vitaly Baranov](https://github.com/vitlibar)). -* This PR allows using multiple LDAP storages in the same list of user directories. It worked earlier but was broken because LDAP tests are disabled (they are part of the testflows tests). [#33574](https://github.com/ClickHouse/ClickHouse/pull/33574) ([Vitaly Baranov](https://github.com/vitlibar)). - -### ClickHouse release v22.1, 2022-01-18 - -#### Upgrade Notes - -* The functions `left` and `right` were previously implemented in parser and now full-featured. Distributed queries with `left` or `right` functions without aliases may throw exception if cluster contains different versions of clickhouse-server. If you are upgrading your cluster and encounter this error, you should finish upgrading your cluster to ensure all nodes have the same version. Also you can add aliases (`AS something`) to the columns in your queries to avoid this issue. [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Resource usage by scalar subqueries is fully accounted since this version. With this change, rows read in scalar subqueries are now reported in the query_log. If the scalar subquery is cached (repeated or called for several rows) the rows read are only counted once. This change allows KILLing queries and reporting progress while they are executing scalar subqueries. [#32271](https://github.com/ClickHouse/ClickHouse/pull/32271) ([Raúl Marín](https://github.com/Algunenano)). - -#### New Feature - -* Implement data schema inference for input formats. Allow to skip structure (or write just `auto`) in table functions `file`, `url`, `s3`, `hdfs` and in parameters of `clickhouse-local` . Allow to skip structure in create query for table engines `File`, `HDFS`, `S3`, `URL`, `Merge`, `Buffer`, `Distributed` and `ReplicatedMergeTree` (if we add new replicas). [#32455](https://github.com/ClickHouse/ClickHouse/pull/32455) ([Kruglov Pavel](https://github.com/Avogar)). -* Detect format by file extension in `file`/`hdfs`/`s3`/`url` table functions and `HDFS`/`S3`/`URL` table engines and also for `SELECT INTO OUTFILE` and `INSERT FROM INFILE` [#33565](https://github.com/ClickHouse/ClickHouse/pull/33565) ([Kruglov Pavel](https://github.com/Avogar)). Close [#30918](https://github.com/ClickHouse/ClickHouse/issues/30918). [#33443](https://github.com/ClickHouse/ClickHouse/pull/33443) ([OnePiece](https://github.com/zhongyuankai)). -* A tool for collecting diagnostics data if you need support. [#33175](https://github.com/ClickHouse/ClickHouse/pull/33175) ([Alexander Burmak](https://github.com/Alex-Burmak)). -* Automatic cluster discovery via Zoo/Keeper. It allows to add replicas to the cluster without changing configuration on every server. [#31442](https://github.com/ClickHouse/ClickHouse/pull/31442) ([vdimir](https://github.com/vdimir)). -* Implement hive table engine to access apache hive from clickhouse. This implements: [#29245](https://github.com/ClickHouse/ClickHouse/issues/29245). [#31104](https://github.com/ClickHouse/ClickHouse/pull/31104) ([taiyang-li](https://github.com/taiyang-li)). -* Add aggregate functions `cramersV`, `cramersVBiasCorrected`, `theilsU` and `contingency`. These functions calculate dependency (measure of association) between categorical values. All these functions are using cross-tab (histogram on pairs) for implementation. You can imagine it like a correlation coefficient but for any discrete values (not necessary numbers). [#33366](https://github.com/ClickHouse/ClickHouse/pull/33366) ([alexey-milovidov](https://github.com/alexey-milovidov)). Initial implementation by [Vanyok-All-is-OK](https://github.com/Vanyok-All-is-OK) and [antikvist](https://github.com/antikvist). -* Added table function `hdfsCluster` which allows processing files from HDFS in parallel from many nodes in a specified cluster, similarly to `s3Cluster`. [#32400](https://github.com/ClickHouse/ClickHouse/pull/32400) ([Zhichang Yu](https://github.com/yuzhichang)). -* Adding support for disks backed by Azure Blob Storage, in a similar way it has been done for disks backed by AWS S3. [#31505](https://github.com/ClickHouse/ClickHouse/pull/31505) ([Jakub Kuklis](https://github.com/jkuklis)). -* Allow `COMMENT` in `CREATE VIEW` (for all VIEW kinds). [#31062](https://github.com/ClickHouse/ClickHouse/pull/31062) ([Vasily Nemkov](https://github.com/Enmk)). -* Dynamically reinitialize listening ports and protocols when configuration changes. [#30549](https://github.com/ClickHouse/ClickHouse/pull/30549) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Added `left`, `right`, `leftUTF8`, `rightUTF8` functions. Fix error in implementation of `substringUTF8` function with negative offset (offset from the end of string). [#33407](https://github.com/ClickHouse/ClickHouse/pull/33407) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add new functions for `H3` coordinate system: `h3HexAreaKm2`, `h3CellAreaM2`, `h3CellAreaRads2`. [#33479](https://github.com/ClickHouse/ClickHouse/pull/33479) ([Bharat Nallan](https://github.com/bharatnc)). -* Add `MONTHNAME` function. [#33436](https://github.com/ClickHouse/ClickHouse/pull/33436) ([usurai](https://github.com/usurai)). -* Added function `arrayLast`. Closes [#33390](https://github.com/ClickHouse/ClickHouse/issues/33390). [#33415](https://github.com/ClickHouse/ClickHouse/pull/33415) Added function `arrayLastIndex`. [#33465](https://github.com/ClickHouse/ClickHouse/pull/33465) ([Maksim Kita](https://github.com/kitaisreal)). -* Add function `decodeURLFormComponent` slightly different to `decodeURLComponent`. Close [#10298](https://github.com/ClickHouse/ClickHouse/issues/10298). [#33451](https://github.com/ClickHouse/ClickHouse/pull/33451) ([SuperDJY](https://github.com/cmsxbc)). -* Allow to split `GraphiteMergeTree` rollup rules for plain/tagged metrics (optional rule_type field). [#33494](https://github.com/ClickHouse/ClickHouse/pull/33494) ([Michail Safronov](https://github.com/msaf1980)). - -#### Performance Improvement - -* Support moving conditions to `PREWHERE` (setting `optimize_move_to_prewhere`) for tables of `Merge` engine if its all underlying tables supports `PREWHERE`. [#33300](https://github.com/ClickHouse/ClickHouse/pull/33300) ([Anton Popov](https://github.com/CurtizJ)). -* More efficient handling of globs for URL storage. Now you can easily query million URLs in parallel with retries. Closes [#32866](https://github.com/ClickHouse/ClickHouse/issues/32866). [#32907](https://github.com/ClickHouse/ClickHouse/pull/32907) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Avoid exponential backtracking in parser. This closes [#20158](https://github.com/ClickHouse/ClickHouse/issues/20158). [#33481](https://github.com/ClickHouse/ClickHouse/pull/33481) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Abuse of `untuple` function was leading to exponential complexity of query analysis (found by fuzzer). This closes [#33297](https://github.com/ClickHouse/ClickHouse/issues/33297). [#33445](https://github.com/ClickHouse/ClickHouse/pull/33445) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Reduce allocated memory for dictionaries with string attributes. [#33466](https://github.com/ClickHouse/ClickHouse/pull/33466) ([Maksim Kita](https://github.com/kitaisreal)). -* Slight performance improvement of `reinterpret` function. [#32587](https://github.com/ClickHouse/ClickHouse/pull/32587) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Non significant change. In extremely rare cases when data part is lost on every replica, after merging of some data parts, the subsequent queries may skip less amount of partitions during partition pruning. This hardly affects anything. [#32220](https://github.com/ClickHouse/ClickHouse/pull/32220) ([Azat Khuzhin](https://github.com/azat)). -* Improve `clickhouse-keeper` writing performance by optimization the size calculation logic. [#32366](https://github.com/ClickHouse/ClickHouse/pull/32366) ([zhanglistar](https://github.com/zhanglistar)). -* Optimize single part projection materialization. This closes [#31669](https://github.com/ClickHouse/ClickHouse/issues/31669). [#31885](https://github.com/ClickHouse/ClickHouse/pull/31885) ([Amos Bird](https://github.com/amosbird)). -* Improve query performance of system tables. [#33312](https://github.com/ClickHouse/ClickHouse/pull/33312) ([OnePiece](https://github.com/zhongyuankai)). -* Optimize selecting of MergeTree parts that can be moved between volumes. [#33225](https://github.com/ClickHouse/ClickHouse/pull/33225) ([OnePiece](https://github.com/zhongyuankai)). -* Fix `sparse_hashed` dict performance with sequential keys (wrong hash function). [#32536](https://github.com/ClickHouse/ClickHouse/pull/32536) ([Azat Khuzhin](https://github.com/azat)). - -#### Experimental Feature - -* Parallel reading from multiple replicas within a shard during distributed query without using sample key. To enable this, set `allow_experimental_parallel_reading_from_replicas = 1` and `max_parallel_replicas` to any number. This closes [#26748](https://github.com/ClickHouse/ClickHouse/issues/26748). [#29279](https://github.com/ClickHouse/ClickHouse/pull/29279) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). -* Implemented sparse serialization. It can reduce usage of disk space and improve performance of some queries for columns, which contain a lot of default (zero) values. It can be enabled by setting `ratio_for_sparse_serialization`. Sparse serialization will be chosen dynamically for column, if it has ratio of number of default values to number of all values above that threshold. Serialization (default or sparse) will be fixed for every column in part, but may varies between parts. [#22535](https://github.com/ClickHouse/ClickHouse/pull/22535) ([Anton Popov](https://github.com/CurtizJ)). -* Add "TABLE OVERRIDE" feature for customizing MaterializedMySQL table schemas. [#32325](https://github.com/ClickHouse/ClickHouse/pull/32325) ([Stig Bakken](https://github.com/stigsb)). -* Add `EXPLAIN TABLE OVERRIDE` query. [#32836](https://github.com/ClickHouse/ClickHouse/pull/32836) ([Stig Bakken](https://github.com/stigsb)). -* Support TABLE OVERRIDE clause for MaterializedPostgreSQL. RFC: [#31480](https://github.com/ClickHouse/ClickHouse/issues/31480). [#32749](https://github.com/ClickHouse/ClickHouse/pull/32749) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Change ZooKeeper path for zero-copy marks for shared data. Note that "zero-copy replication" is non-production feature (in early stages of development) that you shouldn't use anyway. But in case if you have used it, let you keep in mind this change. [#32061](https://github.com/ClickHouse/ClickHouse/pull/32061) ([ianton-ru](https://github.com/ianton-ru)). -* Events clause support for WINDOW VIEW watch query. [#32607](https://github.com/ClickHouse/ClickHouse/pull/32607) ([vxider](https://github.com/Vxider)). -* Fix ACL with explicit digit hash in `clickhouse-keeper`: now the behavior consistent with ZooKeeper and generated digest is always accepted. [#33249](https://github.com/ClickHouse/ClickHouse/pull/33249) ([小路](https://github.com/nicelulu)). [#33246](https://github.com/ClickHouse/ClickHouse/pull/33246). -* Fix unexpected projection removal when detaching parts. [#32067](https://github.com/ClickHouse/ClickHouse/pull/32067) ([Amos Bird](https://github.com/amosbird)). - -#### Improvement - -* Now date time conversion functions that generates time before `1970-01-01 00:00:00` will be saturated to zero instead of overflow. [#29953](https://github.com/ClickHouse/ClickHouse/pull/29953) ([Amos Bird](https://github.com/amosbird)). It also fixes a bug in index analysis if date truncation function would yield result before the Unix epoch. -* Always display resource usage (total CPU usage, total RAM usage and max RAM usage per host) in client. [#33271](https://github.com/ClickHouse/ClickHouse/pull/33271) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Improve `Bool` type serialization and deserialization, check the range of values. [#32984](https://github.com/ClickHouse/ClickHouse/pull/32984) ([Kruglov Pavel](https://github.com/Avogar)). -* If an invalid setting is defined using the `SET` query or using the query parameters in the HTTP request, error message will contain suggestions that are similar to the invalid setting string (if any exists). [#32946](https://github.com/ClickHouse/ClickHouse/pull/32946) ([Antonio Andelic](https://github.com/antonio2368)). -* Support hints for mistyped setting names for clickhouse-client and clickhouse-local. Closes [#32237](https://github.com/ClickHouse/ClickHouse/issues/32237). [#32841](https://github.com/ClickHouse/ClickHouse/pull/32841) ([凌涛](https://github.com/lingtaolf)). -* Allow to use virtual columns in Materialized Views. Close [#11210](https://github.com/ClickHouse/ClickHouse/issues/11210). [#33482](https://github.com/ClickHouse/ClickHouse/pull/33482) ([OnePiece](https://github.com/zhongyuankai)). -* Add config to disable IPv6 in clickhouse-keeper if needed. This close [#33381](https://github.com/ClickHouse/ClickHouse/issues/33381). [#33450](https://github.com/ClickHouse/ClickHouse/pull/33450) ([Wu Xueyang](https://github.com/wuxueyang96)). -* Add more info to `system.build_options` about current git revision. [#33431](https://github.com/ClickHouse/ClickHouse/pull/33431) ([taiyang-li](https://github.com/taiyang-li)). -* `clickhouse-local`: track memory under `--max_memory_usage_in_client` option. [#33341](https://github.com/ClickHouse/ClickHouse/pull/33341) ([Azat Khuzhin](https://github.com/azat)). -* Allow negative intervals in function `intervalLengthSum`. Their length will be added as well. This closes [#33323](https://github.com/ClickHouse/ClickHouse/issues/33323). [#33335](https://github.com/ClickHouse/ClickHouse/pull/33335) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* `LineAsString` can be used as output format. This closes [#30919](https://github.com/ClickHouse/ClickHouse/issues/30919). [#33331](https://github.com/ClickHouse/ClickHouse/pull/33331) ([Sergei Trifonov](https://github.com/serxa)). -* Support `` in cluster configuration, as an alternative form of `1`. Close [#33270](https://github.com/ClickHouse/ClickHouse/issues/33270). [#33330](https://github.com/ClickHouse/ClickHouse/pull/33330) ([SuperDJY](https://github.com/cmsxbc)). -* Pressing Ctrl+C twice will terminate `clickhouse-benchmark` immediately without waiting for in-flight queries. This closes [#32586](https://github.com/ClickHouse/ClickHouse/issues/32586). [#33303](https://github.com/ClickHouse/ClickHouse/pull/33303) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Support Unix timestamp with milliseconds in `parseDateTimeBestEffort` function. [#33276](https://github.com/ClickHouse/ClickHouse/pull/33276) ([Ben](https://github.com/benbiti)). -* Allow to cancel query while reading data from external table in the formats: `Arrow` / `Parquet` / `ORC` - it failed to be cancelled it case of big files and setting input_format_allow_seeks as false. Closes [#29678](https://github.com/ClickHouse/ClickHouse/issues/29678). [#33238](https://github.com/ClickHouse/ClickHouse/pull/33238) ([Kseniia Sumarokova](https://github.com/kssenii)). -* If table engine supports `SETTINGS` clause, allow to pass the settings as key-value or via config. Add this support for MySQL. [#33231](https://github.com/ClickHouse/ClickHouse/pull/33231) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Correctly prevent Nullable primary keys if necessary. This is for [#32780](https://github.com/ClickHouse/ClickHouse/issues/32780). [#33218](https://github.com/ClickHouse/ClickHouse/pull/33218) ([Amos Bird](https://github.com/amosbird)). -* Add retry for `PostgreSQL` connections in case nothing has been fetched yet. Closes [#33199](https://github.com/ClickHouse/ClickHouse/issues/33199). [#33209](https://github.com/ClickHouse/ClickHouse/pull/33209) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Validate config keys for external dictionaries. [#33095](https://github.com/ClickHouse/ClickHouse/issues/33095#issuecomment-1000577517). [#33130](https://github.com/ClickHouse/ClickHouse/pull/33130) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Send profile info inside `clickhouse-local`. Closes [#33093](https://github.com/ClickHouse/ClickHouse/issues/33093). [#33097](https://github.com/ClickHouse/ClickHouse/pull/33097) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Short circuit evaluation: support for function `throwIf`. Closes [#32969](https://github.com/ClickHouse/ClickHouse/issues/32969). [#32973](https://github.com/ClickHouse/ClickHouse/pull/32973) ([Maksim Kita](https://github.com/kitaisreal)). -* (This only happens in unofficial builds). Fixed segfault when inserting data into compressed Decimal, String, FixedString and Array columns. This closes [#32939](https://github.com/ClickHouse/ClickHouse/issues/32939). [#32940](https://github.com/ClickHouse/ClickHouse/pull/32940) ([N. Kolotov](https://github.com/nkolotov)). -* Added support for specifying subquery as SQL user defined function. Example: `CREATE FUNCTION test AS () -> (SELECT 1)`. Closes [#30755](https://github.com/ClickHouse/ClickHouse/issues/30755). [#32758](https://github.com/ClickHouse/ClickHouse/pull/32758) ([Maksim Kita](https://github.com/kitaisreal)). -* Improve gRPC compression support for [#28671](https://github.com/ClickHouse/ClickHouse/issues/28671). [#32747](https://github.com/ClickHouse/ClickHouse/pull/32747) ([Vitaly Baranov](https://github.com/vitlibar)). -* Flush all In-Memory data parts when WAL is not enabled while shutdown server or detaching table. [#32742](https://github.com/ClickHouse/ClickHouse/pull/32742) ([nauta](https://github.com/nautaa)). -* Allow to control connection timeouts for MySQL (previously was supported only for dictionary source). Closes [#16669](https://github.com/ClickHouse/ClickHouse/issues/16669). Previously default connect_timeout was rather small, now it is configurable. [#32734](https://github.com/ClickHouse/ClickHouse/pull/32734) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support `authSource` option for storage `MongoDB`. Closes [#32594](https://github.com/ClickHouse/ClickHouse/issues/32594). [#32702](https://github.com/ClickHouse/ClickHouse/pull/32702) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support `Date32` type in `genarateRandom` table function. [#32643](https://github.com/ClickHouse/ClickHouse/pull/32643) ([nauta](https://github.com/nautaa)). -* Add settings `max_concurrent_select_queries` and `max_concurrent_insert_queries` for control concurrent queries by query kind. Close [#3575](https://github.com/ClickHouse/ClickHouse/issues/3575). [#32609](https://github.com/ClickHouse/ClickHouse/pull/32609) ([SuperDJY](https://github.com/cmsxbc)). -* Improve handling nested structures with missing columns while reading data in `Protobuf` format. Follow-up to https://github.com/ClickHouse/ClickHouse/pull/31988. [#32531](https://github.com/ClickHouse/ClickHouse/pull/32531) ([Vitaly Baranov](https://github.com/vitlibar)). -* Allow empty credentials for `MongoDB` engine. Closes [#26267](https://github.com/ClickHouse/ClickHouse/issues/26267). [#32460](https://github.com/ClickHouse/ClickHouse/pull/32460) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Disable some optimizations for window functions that may lead to exceptions. Closes [#31535](https://github.com/ClickHouse/ClickHouse/issues/31535). Closes [#31620](https://github.com/ClickHouse/ClickHouse/issues/31620). [#32453](https://github.com/ClickHouse/ClickHouse/pull/32453) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Allows to connect to MongoDB 5.0. Closes [#31483](https://github.com/ClickHouse/ClickHouse/issues/31483),. [#32416](https://github.com/ClickHouse/ClickHouse/pull/32416) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Enable comparison between `Decimal` and `Float`. Closes [#22626](https://github.com/ClickHouse/ClickHouse/issues/22626). [#31966](https://github.com/ClickHouse/ClickHouse/pull/31966) ([flynn](https://github.com/ucasFL)). -* Added settings `command_read_timeout`, `command_write_timeout` for `StorageExecutable`, `StorageExecutablePool`, `ExecutableDictionary`, `ExecutablePoolDictionary`, `ExecutableUserDefinedFunctions`. Setting `command_read_timeout` controls timeout for reading data from command stdout in milliseconds. Setting `command_write_timeout` timeout for writing data to command stdin in milliseconds. Added settings `command_termination_timeout` for `ExecutableUserDefinedFunction`, `ExecutableDictionary`, `StorageExecutable`. Added setting `execute_direct` for `ExecutableUserDefinedFunction`, by default true. Added setting `execute_direct` for `ExecutableDictionary`, `ExecutablePoolDictionary`, by default false. [#30957](https://github.com/ClickHouse/ClickHouse/pull/30957) ([Maksim Kita](https://github.com/kitaisreal)). -* Bitmap aggregate functions will give correct result for out of range argument instead of wraparound. [#33127](https://github.com/ClickHouse/ClickHouse/pull/33127) ([DR](https://github.com/freedomDR)). -* Fix parsing incorrect queries with `FROM INFILE` statement. [#33521](https://github.com/ClickHouse/ClickHouse/pull/33521) ([Kruglov Pavel](https://github.com/Avogar)). -* Don't allow to write into `S3` if path contains globs. [#33142](https://github.com/ClickHouse/ClickHouse/pull/33142) ([Kruglov Pavel](https://github.com/Avogar)). -* `--echo` option was not used by `clickhouse-client` in batch mode with single query. [#32843](https://github.com/ClickHouse/ClickHouse/pull/32843) ([N. Kolotov](https://github.com/nkolotov)). -* Use `--database` option for clickhouse-local. [#32797](https://github.com/ClickHouse/ClickHouse/pull/32797) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix surprisingly bad code in SQL ordinary function `file`. Now it supports symlinks. [#32640](https://github.com/ClickHouse/ClickHouse/pull/32640) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Updating `modification_time` for data part in `system.parts` after part movement [#32964](https://github.com/ClickHouse/ClickHouse/issues/32964). [#32965](https://github.com/ClickHouse/ClickHouse/pull/32965) ([save-my-heart](https://github.com/save-my-heart)). -* Potential issue, cannot be exploited: integer overflow may happen in array resize. [#33024](https://github.com/ClickHouse/ClickHouse/pull/33024) ([varadarajkumar](https://github.com/varadarajkumar)). - -#### Build/Testing/Packaging Improvement - -* Add packages, functional tests and Docker builds for AArch64 (ARM) version of ClickHouse. [#32911](https://github.com/ClickHouse/ClickHouse/pull/32911) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). [#32415](https://github.com/ClickHouse/ClickHouse/pull/32415) -* Prepare ClickHouse to be built with musl-libc. It is not enabled by default. [#33134](https://github.com/ClickHouse/ClickHouse/pull/33134) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Make installation script working on FreeBSD. This closes [#33384](https://github.com/ClickHouse/ClickHouse/issues/33384). [#33418](https://github.com/ClickHouse/ClickHouse/pull/33418) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Add `actionlint` for GitHub Actions workflows and verify workflow files via `act --list` to check the correct workflow syntax. [#33612](https://github.com/ClickHouse/ClickHouse/pull/33612) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add more tests for the nullable primary key feature. Add more tests with different types and merge tree kinds, plus randomly generated data. [#33228](https://github.com/ClickHouse/ClickHouse/pull/33228) ([Amos Bird](https://github.com/amosbird)). -* Add a simple tool to visualize flaky tests in web browser. [#33185](https://github.com/ClickHouse/ClickHouse/pull/33185) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Enable hermetic build for shared builds. This is mainly for developers. [#32968](https://github.com/ClickHouse/ClickHouse/pull/32968) ([Amos Bird](https://github.com/amosbird)). -* Update `libc++` and `libc++abi` to the latest. [#32484](https://github.com/ClickHouse/ClickHouse/pull/32484) ([Raúl Marín](https://github.com/Algunenano)). -* Added integration test for external .NET client ([ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client)). [#23230](https://github.com/ClickHouse/ClickHouse/pull/23230) ([Oleg V. Kozlyuk](https://github.com/DarkWanderer)). -* Inject git information into clickhouse binary file. So we can get source code revision easily from clickhouse binary file. [#33124](https://github.com/ClickHouse/ClickHouse/pull/33124) ([taiyang-li](https://github.com/taiyang-li)). -* Remove obsolete code from ConfigProcessor. Yandex specific code is not used anymore. The code contained one minor defect. This defect was reported by [Mallik Hassan](https://github.com/SadiHassan) in [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). This closes [#33032](https://github.com/ClickHouse/ClickHouse/issues/33032). [#33026](https://github.com/ClickHouse/ClickHouse/pull/33026) ([alexey-milovidov](https://github.com/alexey-milovidov)). - -#### Bug Fix (user-visible misbehavior in official stable or prestable release) - -* Several fixes for format parsing. This is relevant if `clickhouse-server` is open for write access to adversary. Specifically crafted input data for `Native` format may lead to reading uninitialized memory or crash. This is relevant if `clickhouse-server` is open for write access to adversary. [#33050](https://github.com/ClickHouse/ClickHouse/pull/33050) ([Heena Bansal](https://github.com/HeenaBansal2009)). Fixed Apache Avro Union type index out of boundary issue in Apache Avro binary format. [#33022](https://github.com/ClickHouse/ClickHouse/pull/33022) ([Harry Lee](https://github.com/HarryLeeIBM)). Fix null pointer dereference in `LowCardinality` data when deserializing `LowCardinality` data in the Native format. [#33021](https://github.com/ClickHouse/ClickHouse/pull/33021) ([Harry Lee](https://github.com/HarryLeeIBM)). -* ClickHouse Keeper handler will correctly remove operation when response sent. [#32988](https://github.com/ClickHouse/ClickHouse/pull/32988) ([JackyWoo](https://github.com/JackyWoo)). -* Potential off-by-one miscalculation of quotas: quota limit was not reached, but the limit was exceeded. This fixes [#31174](https://github.com/ClickHouse/ClickHouse/issues/31174). [#31656](https://github.com/ClickHouse/ClickHouse/pull/31656) ([sunny](https://github.com/sunny19930321)). -* Fixed CASTing from String to IPv4 or IPv6 and back. Fixed error message in case of failed conversion. [#29224](https://github.com/ClickHouse/ClickHouse/pull/29224) ([Dmitry Novik](https://github.com/novikd)) [#27914](https://github.com/ClickHouse/ClickHouse/pull/27914) ([Vasily Nemkov](https://github.com/Enmk)). -* Fixed an exception like `Unknown aggregate function nothing` during an execution on a remote server. This fixes [#16689](https://github.com/ClickHouse/ClickHouse/issues/16689). [#26074](https://github.com/ClickHouse/ClickHouse/pull/26074) ([hexiaoting](https://github.com/hexiaoting)). -* Fix wrong database for JOIN without explicit database in distributed queries (Fixes: [#10471](https://github.com/ClickHouse/ClickHouse/issues/10471)). [#33611](https://github.com/ClickHouse/ClickHouse/pull/33611) ([Azat Khuzhin](https://github.com/azat)). -* Fix segfault in Apache `Avro` format that appears after the second insert into file. [#33566](https://github.com/ClickHouse/ClickHouse/pull/33566) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix segfault in Apache `Arrow` format if schema contains `Dictionary` type. Closes [#33507](https://github.com/ClickHouse/ClickHouse/issues/33507). [#33529](https://github.com/ClickHouse/ClickHouse/pull/33529) ([Kruglov Pavel](https://github.com/Avogar)). -* Out of band `offset` and `limit` settings may be applied incorrectly for views. Close [#33289](https://github.com/ClickHouse/ClickHouse/issues/33289) [#33518](https://github.com/ClickHouse/ClickHouse/pull/33518) ([hexiaoting](https://github.com/hexiaoting)). -* Fix an exception `Block structure mismatch` which may happen during insertion into table with default nested `LowCardinality` column. Fixes [#33028](https://github.com/ClickHouse/ClickHouse/issues/33028). [#33504](https://github.com/ClickHouse/ClickHouse/pull/33504) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix dictionary expressions for `range_hashed` range min and range max attributes when created using DDL. Closes [#30809](https://github.com/ClickHouse/ClickHouse/issues/30809). [#33478](https://github.com/ClickHouse/ClickHouse/pull/33478) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix possible use-after-free for INSERT into Materialized View with concurrent DROP ([Azat Khuzhin](https://github.com/azat)). -* Do not try to read pass EOF (to workaround for a bug in the Linux kernel), this bug can be reproduced on kernels (3.14..5.9), and requires `index_granularity_bytes=0` (i.e. turn off adaptive index granularity). [#33372](https://github.com/ClickHouse/ClickHouse/pull/33372) ([Azat Khuzhin](https://github.com/azat)). -* The commands `SYSTEM SUSPEND` and `SYSTEM ... THREAD FUZZER` missed access control. It is fixed. Author: Kevin Michel. [#33333](https://github.com/ClickHouse/ClickHouse/pull/33333) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix when `COMMENT` for dictionaries does not appear in `system.tables`, `system.dictionaries`. Allow to modify the comment for `Dictionary` engine. Closes [#33251](https://github.com/ClickHouse/ClickHouse/issues/33251). [#33261](https://github.com/ClickHouse/ClickHouse/pull/33261) ([Maksim Kita](https://github.com/kitaisreal)). -* Add asynchronous inserts (with enabled setting `async_insert`) to query log. Previously such queries didn't appear in the query log. [#33239](https://github.com/ClickHouse/ClickHouse/pull/33239) ([Anton Popov](https://github.com/CurtizJ)). -* Fix sending `WHERE 1 = 0` expressions for external databases query. Closes [#33152](https://github.com/ClickHouse/ClickHouse/issues/33152). [#33214](https://github.com/ClickHouse/ClickHouse/pull/33214) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix DDL validation for MaterializedPostgreSQL. Fix setting `materialized_postgresql_allow_automatic_update`. Closes [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33200](https://github.com/ClickHouse/ClickHouse/pull/33200) ([Kseniia Sumarokova](https://github.com/kssenii)). Make sure unused replication slots are always removed. Found in [#26952](https://github.com/ClickHouse/ClickHouse/issues/26952). [#33187](https://github.com/ClickHouse/ClickHouse/pull/33187) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix MaterializedPostreSQL detach/attach (removing / adding to replication) tables with non-default schema. Found in [#29535](https://github.com/ClickHouse/ClickHouse/issues/29535). [#33179](https://github.com/ClickHouse/ClickHouse/pull/33179) ([Kseniia Sumarokova](https://github.com/kssenii)). Fix DROP MaterializedPostgreSQL database. [#33468](https://github.com/ClickHouse/ClickHouse/pull/33468) ([Kseniia Sumarokova](https://github.com/kssenii)). -* The metric `StorageBufferBytes` sometimes was miscalculated. [#33159](https://github.com/ClickHouse/ClickHouse/pull/33159) ([xuyatian](https://github.com/xuyatian)). -* Fix error `Invalid version for SerializationLowCardinality key column` in case of reading from `LowCardinality` column with `local_filesystem_read_prefetch` or `remote_filesystem_read_prefetch` enabled. [#33046](https://github.com/ClickHouse/ClickHouse/pull/33046) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix `s3` table function reading empty file. Closes [#33008](https://github.com/ClickHouse/ClickHouse/issues/33008). [#33037](https://github.com/ClickHouse/ClickHouse/pull/33037) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix Context leak in case of cancel_http_readonly_queries_on_client_close (i.e. leaking of external tables that had been uploaded the the server and other resources). [#32982](https://github.com/ClickHouse/ClickHouse/pull/32982) ([Azat Khuzhin](https://github.com/azat)). -* Fix wrong tuple output in `CSV` format in case of custom csv delimiter. [#32981](https://github.com/ClickHouse/ClickHouse/pull/32981) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix HDFS URL check that didn't allow using HA namenode address. Bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/31042. [#32976](https://github.com/ClickHouse/ClickHouse/pull/32976) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix throwing exception like positional argument out of bounds for non-positional arguments. Closes [#31173](https://github.com/ClickHouse/ClickHouse/issues/31173)#event-5789668239. [#32961](https://github.com/ClickHouse/ClickHouse/pull/32961) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix UB in case of unexpected EOF during filling a set from HTTP query (i.e. if the client interrupted in the middle, i.e. `timeout 0.15s curl -Ss -F 's=@t.csv;' 'http://127.0.0.1:8123/?s_structure=key+Int&query=SELECT+dummy+IN+s'` and with large enough `t.csv`). [#32955](https://github.com/ClickHouse/ClickHouse/pull/32955) ([Azat Khuzhin](https://github.com/azat)). -* Fix a regression in `replaceRegexpAll` function. The function worked incorrectly when matched substring was empty. This closes [#32777](https://github.com/ClickHouse/ClickHouse/issues/32777). This closes [#30245](https://github.com/ClickHouse/ClickHouse/issues/30245). [#32945](https://github.com/ClickHouse/ClickHouse/pull/32945) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix `ORC` format stripe reading. [#32929](https://github.com/ClickHouse/ClickHouse/pull/32929) ([kreuzerkrieg](https://github.com/kreuzerkrieg)). -* `topKWeightedState` failed for some input types. [#32487](https://github.com/ClickHouse/ClickHouse/issues/32487). [#32914](https://github.com/ClickHouse/ClickHouse/pull/32914) ([vdimir](https://github.com/vdimir)). -* Fix exception `Single chunk is expected from view inner query (LOGICAL_ERROR)` in materialized view. Fixes [#31419](https://github.com/ClickHouse/ClickHouse/issues/31419). [#32862](https://github.com/ClickHouse/ClickHouse/pull/32862) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix optimization with lazy seek for async reads from remote filesystems. Closes [#32803](https://github.com/ClickHouse/ClickHouse/issues/32803). [#32835](https://github.com/ClickHouse/ClickHouse/pull/32835) ([Kseniia Sumarokova](https://github.com/kssenii)). -* `MergeTree` table engine might silently skip some mutations if there are too many running mutations or in case of high memory consumption, it's fixed. Fixes [#17882](https://github.com/ClickHouse/ClickHouse/issues/17882). [#32814](https://github.com/ClickHouse/ClickHouse/pull/32814) ([tavplubix](https://github.com/tavplubix)). -* Avoid reusing the scalar subquery cache when processing MV blocks. This fixes a bug when the scalar query reference the source table but it means that all subscalar queries in the MV definition will be calculated for each block. [#32811](https://github.com/ClickHouse/ClickHouse/pull/32811) ([Raúl Marín](https://github.com/Algunenano)). -* Server might fail to start if database with `MySQL` engine cannot connect to MySQL server, it's fixed. Fixes [#14441](https://github.com/ClickHouse/ClickHouse/issues/14441). [#32802](https://github.com/ClickHouse/ClickHouse/pull/32802) ([tavplubix](https://github.com/tavplubix)). -* Fix crash when used `fuzzBits` function, close [#32737](https://github.com/ClickHouse/ClickHouse/issues/32737). [#32755](https://github.com/ClickHouse/ClickHouse/pull/32755) ([SuperDJY](https://github.com/cmsxbc)). -* Fix error `Column is not under aggregate function` in case of MV with `GROUP BY (list of columns)` (which is pared as `GROUP BY tuple(...)`) over `Kafka`/`RabbitMQ`. Fixes [#32668](https://github.com/ClickHouse/ClickHouse/issues/32668) and [#32744](https://github.com/ClickHouse/ClickHouse/issues/32744). [#32751](https://github.com/ClickHouse/ClickHouse/pull/32751) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix `ALTER TABLE ... MATERIALIZE TTL` query with `TTL ... DELETE WHERE ...` and `TTL ... GROUP BY ...` modes. [#32695](https://github.com/ClickHouse/ClickHouse/pull/32695) ([Anton Popov](https://github.com/CurtizJ)). -* Fix `optimize_read_in_order` optimization in case when table engine is `Distributed` or `Merge` and its underlying `MergeTree` tables have monotonous function in prefix of sorting key. [#32670](https://github.com/ClickHouse/ClickHouse/pull/32670) ([Anton Popov](https://github.com/CurtizJ)). -* Fix LOGICAL_ERROR exception when the target of a materialized view is a JOIN or a SET table. [#32669](https://github.com/ClickHouse/ClickHouse/pull/32669) ([Raúl Marín](https://github.com/Algunenano)). -* Inserting into S3 with multipart upload to Google Cloud Storage may trigger abort. [#32504](https://github.com/ClickHouse/ClickHouse/issues/32504). [#32649](https://github.com/ClickHouse/ClickHouse/pull/32649) ([vdimir](https://github.com/vdimir)). -* Fix possible exception at `RabbitMQ` storage startup by delaying channel creation. [#32584](https://github.com/ClickHouse/ClickHouse/pull/32584) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix table lifetime (i.e. possible use-after-free) in case of parallel DROP TABLE and INSERT. [#32572](https://github.com/ClickHouse/ClickHouse/pull/32572) ([Azat Khuzhin](https://github.com/azat)). -* Fix async inserts with formats `CustomSeparated`, `Template`, `Regexp`, `MsgPack` and `JSONAsString`. Previousely the async inserts with these formats didn't read any data. [#32530](https://github.com/ClickHouse/ClickHouse/pull/32530) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `groupBitmapAnd` function on distributed table. [#32529](https://github.com/ClickHouse/ClickHouse/pull/32529) ([minhthucdao](https://github.com/dmthuc)). -* Fix crash in JOIN found by fuzzer, close [#32458](https://github.com/ClickHouse/ClickHouse/issues/32458). [#32508](https://github.com/ClickHouse/ClickHouse/pull/32508) ([vdimir](https://github.com/vdimir)). -* Proper handling of the case with Apache Arrow column duplication. [#32507](https://github.com/ClickHouse/ClickHouse/pull/32507) ([Dmitriy Mokhnatkin](https://github.com/DMokhnatkin)). -* Fix issue with ambiguous query formatting in distributed queries that led to errors when some table columns were named `ALL` or `DISTINCT`. This closes [#32391](https://github.com/ClickHouse/ClickHouse/issues/32391). [#32490](https://github.com/ClickHouse/ClickHouse/pull/32490) ([alexey-milovidov](https://github.com/alexey-milovidov)). -* Fix failures in queries that are trying to use skipping indices, which are not materialized yet. Fixes [#32292](https://github.com/ClickHouse/ClickHouse/issues/32292) and [#30343](https://github.com/ClickHouse/ClickHouse/issues/30343). [#32359](https://github.com/ClickHouse/ClickHouse/pull/32359) ([Anton Popov](https://github.com/CurtizJ)). -* Fix broken select query when there are more than 2 row policies on same column, begin at second queries on the same session. [#31606](https://github.com/ClickHouse/ClickHouse/issues/31606). [#32291](https://github.com/ClickHouse/ClickHouse/pull/32291) ([SuperDJY](https://github.com/cmsxbc)). -* Fix fractional unix timestamp conversion to `DateTime64`, fractional part was reversed for negative unix timestamps (before 1970-01-01). [#32240](https://github.com/ClickHouse/ClickHouse/pull/32240) ([Ben](https://github.com/benbiti)). -* Some entries of replication queue might hang for `temporary_directories_lifetime` (1 day by default) with `Directory tmp_merge_` or `Part ... (state Deleting) already exists, but it will be deleted soon` or similar error. It's fixed. Fixes [#29616](https://github.com/ClickHouse/ClickHouse/issues/29616). [#32201](https://github.com/ClickHouse/ClickHouse/pull/32201) ([tavplubix](https://github.com/tavplubix)). -* Fix parsing of `APPLY lambda` column transformer which could lead to client/server crash. [#32138](https://github.com/ClickHouse/ClickHouse/pull/32138) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix `base64Encode` adding trailing bytes on small strings. [#31797](https://github.com/ClickHouse/ClickHouse/pull/31797) ([Kevin Michel](https://github.com/kmichel-aiven)). -* Fix possible crash (or incorrect result) in case of `LowCardinality` arguments of window function. Fixes [#31114](https://github.com/ClickHouse/ClickHouse/issues/31114). [#31888](https://github.com/ClickHouse/ClickHouse/pull/31888) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix hang up with command `DROP TABLE system.query_log sync`. [#33293](https://github.com/ClickHouse/ClickHouse/pull/33293) ([zhanghuajie](https://github.com/zhanghuajieHIT)). - -## [Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021) +* Replace domain IP types (IPv4, IPv6) with native. [#43221](https://github.com/ClickHouse/ClickHouse/pull/43221) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). It automatically fixes some missing implementations in the code. +* Fix the backup process if mutations get killed during the backup process. [#45351](https://github.com/ClickHouse/ClickHouse/pull/45351) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix the `Invalid number of rows in Chunk` exception message. [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). +* Fix possible use of an uninitialized value after executing expressions after sorting. Closes [#43386](https://github.com/ClickHouse/ClickHouse/issues/43386) [#43635](https://github.com/ClickHouse/ClickHouse/pull/43635) ([Kruglov Pavel](https://github.com/Avogar)). +* Better handling of NULL in aggregate combinators, fix possible segfault/logical error while using an obscure optimization `optimize_rewrite_sum_if_to_count_if`. Closes [#43758](https://github.com/ClickHouse/ClickHouse/issues/43758). [#43813](https://github.com/ClickHouse/ClickHouse/pull/43813) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix CREATE USER/ROLE query settings constraints. [#43993](https://github.com/ClickHouse/ClickHouse/pull/43993) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fixed bug with non-parsable default value for `EPHEMERAL` column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)). +* Bring interval subtraction from datetime in line with addition. [#44241](https://github.com/ClickHouse/ClickHouse/pull/44241) ([ltrk2](https://github.com/ltrk2)). +* Remove limits on the maximum size of the result for view. [#44261](https://github.com/ClickHouse/ClickHouse/pull/44261) ([lizhuoyu5](https://github.com/lzydmxy)). +* Fix possible logical error in cache if `do_not_evict_index_and_mrk_files=1`. Closes [#42142](https://github.com/ClickHouse/ClickHouse/issues/42142). [#44268](https://github.com/ClickHouse/ClickHouse/pull/44268) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible too early cache write interruption in write-through cache (caching could be stopped due to false assumption when it shouldn't have). [#44289](https://github.com/ClickHouse/ClickHouse/pull/44289) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible crash in the case function `IN` with constant arguments was used as a constant argument together with `LowCardinality`. Fixes [#44221](https://github.com/ClickHouse/ClickHouse/issues/44221). [#44346](https://github.com/ClickHouse/ClickHouse/pull/44346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix support for complex parameters (like arrays) of parametric aggregate functions. This closes [#30975](https://github.com/ClickHouse/ClickHouse/issues/30975). The aggregate function `sumMapFiltered` was unusable in distributed queries before this change. [#44358](https://github.com/ClickHouse/ClickHouse/pull/44358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix reading ObjectId in BSON schema inference. [#44382](https://github.com/ClickHouse/ClickHouse/pull/44382) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix race which can lead to premature temp parts removal before merge finishes in ReplicatedMergeTree. This issue could lead to errors like `No such file or directory: xxx`. Fixes [#43983](https://github.com/ClickHouse/ClickHouse/issues/43983). [#44383](https://github.com/ClickHouse/ClickHouse/pull/44383) ([alesapin](https://github.com/alesapin)). +* Some invalid `SYSTEM ... ON CLUSTER` queries worked in an unexpected way if a cluster name was not specified. It's fixed, now invalid queries throw `SYNTAX_ERROR` as they should. Fixes [#44264](https://github.com/ClickHouse/ClickHouse/issues/44264). [#44387](https://github.com/ClickHouse/ClickHouse/pull/44387) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix reading Map type in ORC format. [#44400](https://github.com/ClickHouse/ClickHouse/pull/44400) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading columns that are not presented in input data in Parquet/ORC formats. Previously it could lead to error `INCORRECT_NUMBER_OF_COLUMNS`. Closes [#44333](https://github.com/ClickHouse/ClickHouse/issues/44333). [#44405](https://github.com/ClickHouse/ClickHouse/pull/44405) ([Kruglov Pavel](https://github.com/Avogar)). +* Previously the `bar` function used the same '▋' (U+258B "Left five eighths block") character to display both 5/8 and 6/8 bars. This change corrects this behavior by using '▊' (U+258A "Left three quarters block") for displaying 6/8 bar. [#44410](https://github.com/ClickHouse/ClickHouse/pull/44410) ([Alexander Gololobov](https://github.com/davenger)). +* Placing profile settings after profile settings constraints in the configuration file made constraints ineffective. [#44411](https://github.com/ClickHouse/ClickHouse/pull/44411) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix `SYNTAX_ERROR` while running `EXPLAIN AST INSERT` queries with data. Closes [#44207](https://github.com/ClickHouse/ClickHouse/issues/44207). [#44413](https://github.com/ClickHouse/ClickHouse/pull/44413) ([save-my-heart](https://github.com/save-my-heart)). +* Fix reading bool value with CRLF in CSV format. Closes [#44401](https://github.com/ClickHouse/ClickHouse/issues/44401). [#44442](https://github.com/ClickHouse/ClickHouse/pull/44442) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't execute and/or/if/multiIf on a LowCardinality dictionary, so the result type cannot be LowCardinality. It could lead to the error `Illegal column ColumnLowCardinality` in some cases. Fixes [#43603](https://github.com/ClickHouse/ClickHouse/issues/43603). [#44469](https://github.com/ClickHouse/ClickHouse/pull/44469) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix mutations with the setting `max_streams_for_merge_tree_reading`. [#44472](https://github.com/ClickHouse/ClickHouse/pull/44472) ([Anton Popov](https://github.com/CurtizJ)). +* Fix potential null pointer dereference with GROUPING SETS in ASTSelectQuery::formatImpl ([#43049](https://github.com/ClickHouse/ClickHouse/issues/43049)). [#44479](https://github.com/ClickHouse/ClickHouse/pull/44479) ([Robert Schulze](https://github.com/rschu1ze)). +* Validate types in table function arguments, CAST function arguments, JSONAsObject schema inference according to settings. [#44501](https://github.com/ClickHouse/ClickHouse/pull/44501) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix IN function with LowCardinality and const column, close [#44503](https://github.com/ClickHouse/ClickHouse/issues/44503). [#44506](https://github.com/ClickHouse/ClickHouse/pull/44506) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed a bug in the normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of the function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Projections do not work in presence of WITH ROLLUP, WITH CUBE and WITH TOTALS. In previous versions, a query produced an exception instead of skipping the usage of projections. This closes [#44614](https://github.com/ClickHouse/ClickHouse/issues/44614). This closes [#42772](https://github.com/ClickHouse/ClickHouse/issues/42772). [#44615](https://github.com/ClickHouse/ClickHouse/pull/44615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Async blocks were not cleaned because the function `get all blocks sorted by time` didn't get async blocks. [#44651](https://github.com/ClickHouse/ClickHouse/pull/44651) ([Han Fei](https://github.com/hanfei1991)). +* Fix `LOGICAL_ERROR` `The top step of the right pipeline should be ExpressionStep` for JOIN with subquery, UNION, and TOTALS. Fixes [#43687](https://github.com/ClickHouse/ClickHouse/issues/43687). [#44673](https://github.com/ClickHouse/ClickHouse/pull/44673) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid `std::out_of_range` exception in the Executable table engine. [#44681](https://github.com/ClickHouse/ClickHouse/pull/44681) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply `optimize_syntax_fuse_functions` to quantiles on AST, close [#44712](https://github.com/ClickHouse/ClickHouse/issues/44712). [#44713](https://github.com/ClickHouse/ClickHouse/pull/44713) ([Vladimir C](https://github.com/vdimir)). +* Fix bug with wrong type in Merge table and PREWHERE, close [#43324](https://github.com/ClickHouse/ClickHouse/issues/43324). [#44716](https://github.com/ClickHouse/ClickHouse/pull/44716) ([Vladimir C](https://github.com/vdimir)). +* Fix a possible crash during shutdown (while destroying TraceCollector). Fixes [#44757](https://github.com/ClickHouse/ClickHouse/issues/44757). [#44758](https://github.com/ClickHouse/ClickHouse/pull/44758) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a possible crash in distributed query processing. The crash could happen if a query with totals or extremes returned an empty result and there are mismatched types in the Distributed and the local tables. Fixes [#44738](https://github.com/ClickHouse/ClickHouse/issues/44738). [#44760](https://github.com/ClickHouse/ClickHouse/pull/44760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix fsync for fetches (`min_compressed_bytes_to_fsync_after_fetch`)/small files (ttl.txt, columns.txt) in mutations (`min_rows_to_fsync_after_merge`/`min_compressed_bytes_to_fsync_after_merge`). [#44781](https://github.com/ClickHouse/ClickHouse/pull/44781) ([Azat Khuzhin](https://github.com/azat)). +* A rare race condition was possible when querying the `system.parts` or `system.parts_columns` tables in the presence of parts being moved between disks. Introduced in [#41145](https://github.com/ClickHouse/ClickHouse/issues/41145). [#44809](https://github.com/ClickHouse/ClickHouse/pull/44809) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the error `Context has expired` which could appear with enabled projections optimization. Can be reproduced for queries with specific functions, like `dictHas/dictGet` which use context in runtime. Fixes [#44844](https://github.com/ClickHouse/ClickHouse/issues/44844). [#44850](https://github.com/ClickHouse/ClickHouse/pull/44850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* A fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Ignore cases when hardware monitor sensors cannot be read instead of showing a full exception message in logs. [#44895](https://github.com/ClickHouse/ClickHouse/pull/44895) ([Raúl Marín](https://github.com/Algunenano)). +* Use `max_delay_to_insert` value in case the calculated time to delay INSERT exceeds the setting value. Related to [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44916](https://github.com/ClickHouse/ClickHouse/pull/44916) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix error `Different order of columns in UNION subquery` for queries with `UNION`. Fixes [#44866](https://github.com/ClickHouse/ClickHouse/issues/44866). [#44920](https://github.com/ClickHouse/ClickHouse/pull/44920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Delay for INSERT can be calculated incorrectly, which can lead to always using `max_delay_to_insert` setting as delay instead of a correct value. Using simple formula `max_delay_to_insert * (parts_over_threshold/max_allowed_parts_over_threshold)` i.e. delay grows proportionally to parts over threshold. Closes [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44954](https://github.com/ClickHouse/ClickHouse/pull/44954) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix alter table TTL error when a wide part has the lightweight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native [#43221](https://github.com/ClickHouse/ClickHouse/issues/43221). [#45024](https://github.com/ClickHouse/ClickHouse/pull/45024) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#45043](https://github.com/ClickHouse/ClickHouse/pull/45043) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* A buffer overflow was possible in the parser. Found by fuzzer. [#45047](https://github.com/ClickHouse/ClickHouse/pull/45047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible cannot-read-all-data error in storage FileLog. Closes [#45051](https://github.com/ClickHouse/ClickHouse/issues/45051), [#38257](https://github.com/ClickHouse/ClickHouse/issues/38257). [#45057](https://github.com/ClickHouse/ClickHouse/pull/45057) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Memory efficient aggregation (setting `distributed_aggregation_memory_efficient`) is disabled when grouping sets are present in the query. [#45058](https://github.com/ClickHouse/ClickHouse/pull/45058) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `RANGE_HASHED` dictionary to count range columns as part of the primary key during updates when `update_field` is specified. Closes [#44588](https://github.com/ClickHouse/ClickHouse/issues/44588). [#45061](https://github.com/ClickHouse/ClickHouse/pull/45061) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix error `Cannot capture column` for `LowCardinality` captured argument of nested lambda. Fixes [#45028](https://github.com/ClickHouse/ClickHouse/issues/45028). [#45065](https://github.com/ClickHouse/ClickHouse/pull/45065) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the wrong query result of `additional_table_filters` (additional filter was not applied) in case the minmax/count projection is used. [#45133](https://github.com/ClickHouse/ClickHouse/pull/45133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed bug in `histogram` function accepting negative values. [#45147](https://github.com/ClickHouse/ClickHouse/pull/45147) ([simpleton](https://github.com/rgzntrade)). +* Fix wrong column nullability in StoreageJoin, close [#44940](https://github.com/ClickHouse/ClickHouse/issues/44940). [#45184](https://github.com/ClickHouse/ClickHouse/pull/45184) ([Vladimir C](https://github.com/vdimir)). +* Fix `background_fetches_pool_size` settings reload (increase at runtime). [#45189](https://github.com/ClickHouse/ClickHouse/pull/45189) ([Raúl Marín](https://github.com/Algunenano)). +* Correctly process `SELECT` queries on KV engines (e.g. KeeperMap, EmbeddedRocksDB) using `IN` on the key with subquery producing different type. [#45215](https://github.com/ClickHouse/ClickHouse/pull/45215) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix logical error in SEMI JOIN & join_use_nulls in some cases, close [#45163](https://github.com/ClickHouse/ClickHouse/issues/45163), close [#45209](https://github.com/ClickHouse/ClickHouse/issues/45209). [#45230](https://github.com/ClickHouse/ClickHouse/pull/45230) ([Vladimir C](https://github.com/vdimir)). +* Fix heap-use-after-free in reading from s3. [#45253](https://github.com/ClickHouse/ClickHouse/pull/45253) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug when the Avro Union type is ['null', Nested type], closes [#45275](https://github.com/ClickHouse/ClickHouse/issues/45275). Fix bug that incorrectly infers `bytes` type to `Float`. [#45276](https://github.com/ClickHouse/ClickHouse/pull/45276) ([flynn](https://github.com/ucasfl)). +* Throw a correct exception when explicit PREWHERE cannot be used with a table using the storage engine `Merge`. [#45319](https://github.com/ClickHouse/ClickHouse/pull/45319) ([Antonio Andelic](https://github.com/antonio2368)). +* Under WSL1 Ubuntu self-extracting ClickHouse fails to decompress due to inconsistency - /proc/self/maps reporting 32bit file's inode, while stat reporting 64bit inode. [#45339](https://github.com/ClickHouse/ClickHouse/pull/45339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix race in Distributed table startup (that could lead to processing file of async INSERT multiple times). [#45360](https://github.com/ClickHouse/ClickHouse/pull/45360) ([Azat Khuzhin](https://github.com/azat)). +* Fix a possible crash while reading from storage `S3` and table function `s3` in the case when `ListObject` request has failed. [#45371](https://github.com/ClickHouse/ClickHouse/pull/45371) ([Anton Popov](https://github.com/CurtizJ)). +* Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in XML config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix s3Cluster schema inference when structure from insertion table is used in `INSERT INTO ... SELECT * FROM s3Cluster` queries. [#45422](https://github.com/ClickHouse/ClickHouse/pull/45422) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug in JSON/BSONEachRow parsing with HTTP that could lead to using default values for some columns instead of values from data. [#45424](https://github.com/ClickHouse/ClickHouse/pull/45424) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed bug (Code: 632. DB::Exception: Unexpected data ... after parsed IPv6 value ...) with typed parsing of IP types from text source. [#45425](https://github.com/ClickHouse/ClickHouse/pull/45425) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* close [#45297](https://github.com/ClickHouse/ClickHouse/issues/45297) Add check for empty regular expressions. [#45428](https://github.com/ClickHouse/ClickHouse/pull/45428) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible (likely distributed) query hung. [#45448](https://github.com/ClickHouse/ClickHouse/pull/45448) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible deadlock with `allow_asynchronous_read_from_io_pool_for_merge_tree` enabled in case of exception from `ThreadPool::schedule`. [#45481](https://github.com/ClickHouse/ClickHouse/pull/45481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible in-use table after DETACH. [#45493](https://github.com/ClickHouse/ClickHouse/pull/45493) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare abort in the case when a query is canceled and parallel parsing was used during its execution. [#45498](https://github.com/ClickHouse/ClickHouse/pull/45498) ([Anton Popov](https://github.com/CurtizJ)). +* Fix a race between Distributed table creation and INSERT into it (could lead to CANNOT_LINK during INSERT into the table). [#45502](https://github.com/ClickHouse/ClickHouse/pull/45502) ([Azat Khuzhin](https://github.com/azat)). +* Add proper default (SLRU) to cache policy getter. Closes [#45514](https://github.com/ClickHouse/ClickHouse/issues/45514). [#45524](https://github.com/ClickHouse/ClickHouse/pull/45524) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disallow array join in mutations closes [#42637](https://github.com/ClickHouse/ClickHouse/issues/42637) [#44447](https://github.com/ClickHouse/ClickHouse/pull/44447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix for qualified asterisks with alias table name and column transformer. Resolves [#44736](https://github.com/ClickHouse/ClickHouse/issues/44736). [#44755](https://github.com/ClickHouse/ClickHouse/pull/44755) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +## [Changelog for 2022](https://clickhouse.com/docs/en/whats-new/changelog/2022) diff --git a/README.md b/README.md index 951dbf67160..bcf2643c33d 100644 --- a/README.md +++ b/README.md @@ -9,13 +9,12 @@ ClickHouse® is an open-source column-oriented database management system that a * [Tutorial](https://clickhouse.com/docs/en/getting_started/tutorial/) shows how to set up and query a small ClickHouse cluster. * [Documentation](https://clickhouse.com/docs/en/) provides more in-depth information. * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. -* [Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-rxm3rdrk-lIUmhLC3V8WTaL0TGxsOmg) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. +* [Slack](https://clickhousedb.slack.com/) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. * [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. * [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming events -* **Recording available**: [**v22.12 Release Webinar**](https://www.youtube.com/watch?v=sREupr6uc2k) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - Jan 16 - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion! -* [**ClickHouse Meetup at Microsoft Office in Seattle**](https://www.meetup.com/clickhouse-seattle-user-group/events/290310025/) - Jan 18 - Keep an eye on this space as we will be announcing speakers soon! +* **Recording available**: [**v23.1 Release Webinar**](https://www.youtube.com/watch?v=zYSZXBnTMSE) 23.1 is the ClickHouse New Year release. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. Inverted indices, query cache, and so -- very -- much more. +* **Recording available**: [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion! diff --git a/SECURITY.md b/SECURITY.md index 3dcdc5db009..0fd72971d30 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.1 | ✔️ | | 22.12 | ✔️ | | 22.11 | ✔️ | -| 22.10 | ✔️ | +| 22.10 | ❌ | | 22.9 | ❌ | | 22.8 | ✔️ | | 22.7 | ❌ | @@ -25,18 +26,7 @@ The following versions of ClickHouse server are currently being supported with s | 22.3 | ✔️ | | 22.2 | ❌ | | 22.1 | ❌ | -| 21.12 | ❌ | -| 21.11 | ❌ | -| 21.10 | ❌ | -| 21.9 | ❌ | -| 21.8 | ❌ | -| 21.7 | ❌ | -| 21.6 | ❌ | -| 21.5 | ❌ | -| 21.4 | ❌ | -| 21.3 | ❌ | -| 21.2 | ❌ | -| 21.1 | ❌ | +| 21.* | ❌ | | 20.* | ❌ | | 19.* | ❌ | | 18.* | ❌ | diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 87b11c46f45..812a0d9e64b 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54470) -SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 13) +SET(VERSION_REVISION 54471) +SET(VERSION_MAJOR 23) +SET(VERSION_MINOR 2) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3) -SET(VERSION_DESCRIBE v22.13.1.1-testing) -SET(VERSION_STRING 22.13.1.1) +SET(VERSION_GITHASH dcaac47702510cc87ddf266bc524f6b7ce0a8e6e) +SET(VERSION_DESCRIBE v23.2.1.1-testing) +SET(VERSION_STRING 23.2.1.1) # end of autochange diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index f5d1315cc02..5fc8d960f56 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -140,6 +140,7 @@ add_contrib (simdjson-cmake simdjson) add_contrib (rapidjson-cmake rapidjson) add_contrib (fastops-cmake fastops) add_contrib (libuv-cmake libuv) +add_contrib (liburing-cmake liburing) add_contrib (amqpcpp-cmake AMQP-CPP) # requires: libuv add_contrib (cassandra-cmake cassandra) # requires: libuv diff --git a/contrib/NuRaft b/contrib/NuRaft index afc36dfa9b0..b56784be1ae 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit afc36dfa9b0beb45bc4cd935060631cc80ba04a5 +Subproject commit b56784be1aec568fb72aff47f281097c017623cb diff --git a/contrib/arrow b/contrib/arrow index 450a5638704..d03245f801f 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 450a5638704386356f8e520080468fc9bc8bcaf8 +Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666 diff --git a/contrib/icu-cmake/CMakeLists.txt b/contrib/icu-cmake/CMakeLists.txt index ce82155218c..a54bd8c1de2 100644 --- a/contrib/icu-cmake/CMakeLists.txt +++ b/contrib/icu-cmake/CMakeLists.txt @@ -460,8 +460,15 @@ set(ICUI18N_SOURCES file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" CONTENT " ") enable_language(ASM) + +if (ARCH_S390X) + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70b_dat.S" ) +else() + set(ICUDATA_SOURCE_FILE "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" ) +endif() + set(ICUDATA_SOURCES - "${ICUDATA_SOURCE_DIR}/icudt70l_dat.S" + "${ICUDATA_SOURCE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/empty.cpp" # Without this cmake can incorrectly detects library type (OBJECT) instead of SHARED/STATIC ) diff --git a/contrib/icudata b/contrib/icudata index 72d9a4a7feb..c8e717892a5 160000 --- a/contrib/icudata +++ b/contrib/icudata @@ -1 +1 @@ -Subproject commit 72d9a4a7febc904e2b0a534ccb25ae40fac5f1e5 +Subproject commit c8e717892a557b4d2852317c7d628aacc0a0e5ab diff --git a/contrib/krb5 b/contrib/krb5 index b89e20367b0..f8262a1b548 160000 --- a/contrib/krb5 +++ b/contrib/krb5 @@ -1 +1 @@ -Subproject commit b89e20367b074bd02dd118a6534099b21e88b3c3 +Subproject commit f8262a1b548eb29d97e059260042036255d07f8d diff --git a/contrib/krb5-cmake/CMakeLists.txt b/contrib/krb5-cmake/CMakeLists.txt index 7e184d424aa..ceaa270ad85 100644 --- a/contrib/krb5-cmake/CMakeLists.txt +++ b/contrib/krb5-cmake/CMakeLists.txt @@ -15,6 +15,10 @@ if(NOT AWK_PROGRAM) message(FATAL_ERROR "You need the awk program to build ClickHouse with krb5 enabled.") endif() +if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) + add_compile_definitions(USE_BORINGSSL=1) +endif () + set(KRB5_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/krb5/src") set(KRB5_ET_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}/include_private") @@ -578,12 +582,6 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin") list(APPEND ALL_SRCS "${CMAKE_CURRENT_BINARY_DIR}/include_private/kcmrpc.c") endif() -if (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC) - list(REMOVE_ITEM ALL_SRCS "${KRB5_SOURCE_DIR}/lib/crypto/openssl/enc_provider/aes.c") - list(APPEND ALL_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/aes.c") -endif () - - target_sources(_krb5 PRIVATE ${ALL_SRCS} ) diff --git a/contrib/krb5-cmake/aes.c b/contrib/krb5-cmake/aes.c deleted file mode 100644 index c0c8c728bff..00000000000 --- a/contrib/krb5-cmake/aes.c +++ /dev/null @@ -1,302 +0,0 @@ -/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil -*- */ -/* lib/crypto/openssl/enc_provider/aes.c */ -/* - * Copyright (C) 2003, 2007, 2008, 2009 by the Massachusetts Institute of Technology. - * All rights reserved. - * - * Export of this software from the United States of America may - * require a specific license from the United States Government. - * It is the responsibility of any person or organization contemplating - * export to obtain such a license before exporting. - * - * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and - * distribute this software and its documentation for any purpose and - * without fee is hereby granted, provided that the above copyright - * notice appear in all copies and that both that copyright notice and - * this permission notice appear in supporting documentation, and that - * the name of M.I.T. not be used in advertising or publicity pertaining - * to distribution of the software without specific, written prior - * permission. Furthermore if you modify this software you must label - * your software as modified software and not distribute it in such a - * fashion that it might be confused with the original M.I.T. software. - * M.I.T. makes no representations about the suitability of - * this software for any purpose. It is provided "as is" without express - * or implied warranty. - */ - -#include "crypto_int.h" -#include -#include - -/* proto's */ -static krb5_error_code -cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data); -static krb5_error_code -cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data); -static krb5_error_code -cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen); -static krb5_error_code -cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen); - -#define BLOCK_SIZE 16 -#define NUM_BITS 8 -#define IV_CTS_BUF_SIZE 16 /* 16 - hardcoded in CRYPTO_cts128_en/decrypt */ - -static const EVP_CIPHER * -map_mode(unsigned int len) -{ - if (len==16) - return EVP_aes_128_cbc(); - if (len==32) - return EVP_aes_256_cbc(); - else - return NULL; -} - -/* Encrypt one block using CBC. */ -static krb5_error_code -cbc_enc(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data) -{ - int ret, olen = BLOCK_SIZE; - unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE]; - EVP_CIPHER_CTX *ctx; - struct iov_cursor cursor; - - ctx = EVP_CIPHER_CTX_new(); - if (ctx == NULL) - return ENOMEM; - - ret = EVP_EncryptInit_ex(ctx, map_mode(key->keyblock.length), - NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL); - if (ret == 0) { - EVP_CIPHER_CTX_free(ctx); - return KRB5_CRYPTO_INTERNAL; - } - - k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE); - k5_iov_cursor_get(&cursor, iblock); - EVP_CIPHER_CTX_set_padding(ctx,0); - ret = EVP_EncryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE); - if (ret == 1) - k5_iov_cursor_put(&cursor, oblock); - EVP_CIPHER_CTX_free(ctx); - - zap(iblock, BLOCK_SIZE); - zap(oblock, BLOCK_SIZE); - return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL; -} - -/* Decrypt one block using CBC. */ -static krb5_error_code -cbc_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data) -{ - int ret = 0, olen = BLOCK_SIZE; - unsigned char iblock[BLOCK_SIZE], oblock[BLOCK_SIZE]; - EVP_CIPHER_CTX *ctx; - struct iov_cursor cursor; - - ctx = EVP_CIPHER_CTX_new(); - if (ctx == NULL) - return ENOMEM; - - ret = EVP_DecryptInit_ex(ctx, map_mode(key->keyblock.length), - NULL, key->keyblock.contents, (ivec) ? (unsigned char*)ivec->data : NULL); - if (ret == 0) { - EVP_CIPHER_CTX_free(ctx); - return KRB5_CRYPTO_INTERNAL; - } - - k5_iov_cursor_init(&cursor, data, num_data, BLOCK_SIZE, FALSE); - k5_iov_cursor_get(&cursor, iblock); - EVP_CIPHER_CTX_set_padding(ctx,0); - ret = EVP_DecryptUpdate(ctx, oblock, &olen, iblock, BLOCK_SIZE); - if (ret == 1) - k5_iov_cursor_put(&cursor, oblock); - EVP_CIPHER_CTX_free(ctx); - - zap(iblock, BLOCK_SIZE); - zap(oblock, BLOCK_SIZE); - return (ret == 1) ? 0 : KRB5_CRYPTO_INTERNAL; -} - -static krb5_error_code -cts_encr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen) -{ - int ret = 0; - size_t size = 0; - unsigned char *oblock = NULL, *dbuf = NULL; - unsigned char iv_cts[IV_CTS_BUF_SIZE]; - struct iov_cursor cursor; - AES_KEY enck; - - memset(iv_cts,0,sizeof(iv_cts)); - if (ivec && ivec->data){ - if (ivec->length != sizeof(iv_cts)) - return KRB5_CRYPTO_INTERNAL; - memcpy(iv_cts, ivec->data,ivec->length); - } - - oblock = OPENSSL_malloc(dlen); - if (!oblock){ - return ENOMEM; - } - dbuf = OPENSSL_malloc(dlen); - if (!dbuf){ - OPENSSL_free(oblock); - return ENOMEM; - } - - k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE); - k5_iov_cursor_get(&cursor, dbuf); - - AES_set_encrypt_key(key->keyblock.contents, - NUM_BITS * key->keyblock.length, &enck); - - size = CRYPTO_cts128_encrypt((unsigned char *)dbuf, oblock, dlen, &enck, - iv_cts, AES_cbc_encrypt); - if (size <= 0) - ret = KRB5_CRYPTO_INTERNAL; - else - k5_iov_cursor_put(&cursor, oblock); - - if (!ret && ivec && ivec->data) - memcpy(ivec->data, iv_cts, sizeof(iv_cts)); - - zap(oblock, dlen); - zap(dbuf, dlen); - OPENSSL_free(oblock); - OPENSSL_free(dbuf); - - return ret; -} - -static krb5_error_code -cts_decr(krb5_key key, const krb5_data *ivec, krb5_crypto_iov *data, - size_t num_data, size_t dlen) -{ - int ret = 0; - size_t size = 0; - unsigned char *oblock = NULL; - unsigned char *dbuf = NULL; - unsigned char iv_cts[IV_CTS_BUF_SIZE]; - struct iov_cursor cursor; - AES_KEY deck; - - memset(iv_cts,0,sizeof(iv_cts)); - if (ivec && ivec->data){ - if (ivec->length != sizeof(iv_cts)) - return KRB5_CRYPTO_INTERNAL; - memcpy(iv_cts, ivec->data,ivec->length); - } - - oblock = OPENSSL_malloc(dlen); - if (!oblock) - return ENOMEM; - dbuf = OPENSSL_malloc(dlen); - if (!dbuf){ - OPENSSL_free(oblock); - return ENOMEM; - } - - AES_set_decrypt_key(key->keyblock.contents, - NUM_BITS * key->keyblock.length, &deck); - - k5_iov_cursor_init(&cursor, data, num_data, dlen, FALSE); - k5_iov_cursor_get(&cursor, dbuf); - - size = CRYPTO_cts128_decrypt((unsigned char *)dbuf, oblock, - dlen, &deck, - iv_cts, AES_cbc_encrypt); - if (size <= 0) - ret = KRB5_CRYPTO_INTERNAL; - else - k5_iov_cursor_put(&cursor, oblock); - - if (!ret && ivec && ivec->data) - memcpy(ivec->data, iv_cts, sizeof(iv_cts)); - - zap(oblock, dlen); - zap(dbuf, dlen); - OPENSSL_free(oblock); - OPENSSL_free(dbuf); - - return ret; -} - -krb5_error_code -krb5int_aes_encrypt(krb5_key key, const krb5_data *ivec, - krb5_crypto_iov *data, size_t num_data) -{ - int ret = 0; - size_t input_length, nblocks; - - input_length = iov_total_length(data, num_data, FALSE); - nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE; - if (nblocks == 1) { - if (input_length != BLOCK_SIZE) - return KRB5_BAD_MSIZE; - ret = cbc_enc(key, ivec, data, num_data); - } else if (nblocks > 1) { - ret = cts_encr(key, ivec, data, num_data, input_length); - } - - return ret; -} - -krb5_error_code -krb5int_aes_decrypt(krb5_key key, const krb5_data *ivec, - krb5_crypto_iov *data, size_t num_data) -{ - int ret = 0; - size_t input_length, nblocks; - - input_length = iov_total_length(data, num_data, FALSE); - nblocks = (input_length + BLOCK_SIZE - 1) / BLOCK_SIZE; - if (nblocks == 1) { - if (input_length != BLOCK_SIZE) - return KRB5_BAD_MSIZE; - ret = cbc_decr(key, ivec, data, num_data); - } else if (nblocks > 1) { - ret = cts_decr(key, ivec, data, num_data, input_length); - } - - return ret; -} - -static krb5_error_code -krb5int_aes_init_state (const krb5_keyblock *key, krb5_keyusage usage, - krb5_data *state) -{ - state->length = 16; - state->data = (void *) malloc(16); - if (state->data == NULL) - return ENOMEM; - memset(state->data, 0, state->length); - return 0; -} -const struct krb5_enc_provider krb5int_enc_aes128 = { - 16, - 16, 16, - krb5int_aes_encrypt, - krb5int_aes_decrypt, - NULL, - krb5int_aes_init_state, - krb5int_default_free_state -}; - -const struct krb5_enc_provider krb5int_enc_aes256 = { - 16, - 32, 32, - krb5int_aes_encrypt, - krb5int_aes_decrypt, - NULL, - krb5int_aes_init_state, - krb5int_default_free_state -}; diff --git a/contrib/liburing b/contrib/liburing new file mode 160000 index 00000000000..f5a48392c4e --- /dev/null +++ b/contrib/liburing @@ -0,0 +1 @@ +Subproject commit f5a48392c4ea33f222cbebeb2e2fc31620162949 diff --git a/contrib/liburing-cmake/CMakeLists.txt b/contrib/liburing-cmake/CMakeLists.txt new file mode 100644 index 00000000000..02bc116c660 --- /dev/null +++ b/contrib/liburing-cmake/CMakeLists.txt @@ -0,0 +1,53 @@ +set (ENABLE_LIBURING_DEFAULT ${ENABLE_LIBRARIES}) + +if (NOT OS_LINUX) + set (ENABLE_LIBURING_DEFAULT OFF) +endif () + +option (ENABLE_LIBURING "Enable liburing" ${ENABLE_LIBURING_DEFAULT}) + +if (NOT ENABLE_LIBURING) + message (STATUS "Not using liburing") + return () +endif () + +set (LIBURING_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src/include") +set (LIBURING_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/liburing/src") + +set (SRCS + "${LIBURING_SOURCE_DIR}/queue.c" + "${LIBURING_SOURCE_DIR}/register.c" + "${LIBURING_SOURCE_DIR}/setup.c" + "${LIBURING_SOURCE_DIR}/syscall.c" + "${LIBURING_SOURCE_DIR}/version.c" +) + +add_compile_definitions (_GNU_SOURCE) +add_compile_definitions (LIBURING_INTERNAL) + +set (LIBURING_COMPAT_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include-compat") +set (LIBURING_COMPAT_HEADER "${LIBURING_COMPAT_INCLUDE_DIR}/liburing/compat.h") + +set (LIBURING_CONFIG_HAS_KERNEL_RWF_T FALSE) +set (LIBURING_CONFIG_HAS_KERNEL_TIMESPEC FALSE) +set (LIBURING_CONFIG_HAS_OPEN_HOW FALSE) +set (LIBURING_CONFIG_HAS_STATX FALSE) +set (LIBURING_CONFIG_HAS_GLIBC_STATX FALSE) + +configure_file (compat.h.in ${LIBURING_COMPAT_HEADER}) + +set (LIBURING_GENERATED_INCLUDE_DIR "${ClickHouse_BINARY_DIR}/contrib/liburing/src/include") +set (LIBURING_VERSION_HEADER "${LIBURING_GENERATED_INCLUDE_DIR}/liburing/io_uring_version.h") + +file (READ "${LIBURING_SOURCE_DIR}/../liburing.spec" LIBURING_SPEC) + +string (REGEX MATCH "Version: ([0-9]+)\.([0-9]+)" _ ${LIBURING_SPEC}) +set (LIBURING_VERSION_MAJOR ${CMAKE_MATCH_1}) +set (LIBURING_VERSION_MINOR ${CMAKE_MATCH_2}) + +configure_file (io_uring_version.h.in ${LIBURING_VERSION_HEADER}) + +add_library (_liburing ${SRCS}) +add_library (ch_contrib::liburing ALIAS _liburing) + +target_include_directories (_liburing SYSTEM PUBLIC ${LIBURING_COMPAT_INCLUDE_DIR} ${LIBURING_GENERATED_INCLUDE_DIR} "${LIBURING_SOURCE_DIR}/include") diff --git a/contrib/liburing-cmake/compat.h.in b/contrib/liburing-cmake/compat.h.in new file mode 100644 index 00000000000..468e529cd33 --- /dev/null +++ b/contrib/liburing-cmake/compat.h.in @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef LIBURING_COMPAT_H +#define LIBURING_COMPAT_H + +# cmakedefine LIBURING_CONFIG_HAS_KERNEL_RWF_T +# cmakedefine LIBURING_CONFIG_HAS_KERNEL_TIMESPEC +# cmakedefine LIBURING_CONFIG_HAS_OPEN_HOW +# cmakedefine LIBURING_CONFIG_HAS_GLIBC_STATX +# cmakedefine LIBURING_CONFIG_HAS_STATX + +#if !defined(LIBURING_CONFIG_HAS_KERNEL_RWF_T) +typedef int __kernel_rwf_t; +#endif + +#if !defined(LIBURING_CONFIG_HAS_KERNEL_TIMESPEC) +#include + +struct __kernel_timespec { + int64_t tv_sec; + long long tv_nsec; +}; + +/* is not available, so it can't be included */ +#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1 + +#else +#include + +/* is included above and not needed again */ +#define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H 1 + +#endif + +#if !defined(LIBURING_CONFIG_HAS_OPEN_HOW) +#include + +struct open_how { + uint64_t flags; + uint64_t mode; + uint64_t resolve; +}; +#else +#include +#endif + +#if !defined(LIBURING_CONFIG_HAS_GLIBC_STATX) && defined(LIBURING_CONFIG_HAS_STATX) +#include +#endif + +#endif diff --git a/contrib/liburing-cmake/io_uring_version.h.in b/contrib/liburing-cmake/io_uring_version.h.in new file mode 100644 index 00000000000..3fc6132b224 --- /dev/null +++ b/contrib/liburing-cmake/io_uring_version.h.in @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: MIT */ +#ifndef LIBURING_VERSION_H +#define LIBURING_VERSION_H + +#define IO_URING_VERSION_MAJOR ${LIBURING_VERSION_MAJOR} +#define IO_URING_VERSION_MINOR ${LIBURING_VERSION_MINOR} + +#endif diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index dff5dff0936..92739ff3608 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -1,3 +1,9 @@ +# Note: ClickHouse uses BoringSSL. The presence of OpenSSL is only due to IBM's port of ClickHouse to s390x. BoringSSL does not support +# s390x, also FIPS validation provided by the OS vendor (Red Hat, Ubuntu) requires (preferrably dynamic) linking with OS packages which +# ClickHouse generally avoids. +# +# Furthermore, the in-source OpenSSL dump in this directory is due to development purposes and non FIPS-compliant. + if(ENABLE_OPENSSL_DYNAMIC OR ENABLE_OPENSSL) set(ENABLE_SSL 1 CACHE INTERNAL "") set(OPENSSL_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/openssl) diff --git a/contrib/poco b/contrib/poco index 4b1c8dd9913..7fefdf30244 160000 --- a/contrib/poco +++ b/contrib/poco @@ -1 +1 @@ -Subproject commit 4b1c8dd9913d2a16db62df0e509fa598da5c8219 +Subproject commit 7fefdf30244a9bf8eb58562a9b2a51cc59a8877a diff --git a/contrib/snappy-cmake/CMakeLists.txt b/contrib/snappy-cmake/CMakeLists.txt index 0997ea207e0..50cdc8732a1 100644 --- a/contrib/snappy-cmake/CMakeLists.txt +++ b/contrib/snappy-cmake/CMakeLists.txt @@ -1,6 +1,10 @@ set (SOURCE_DIR "${CMAKE_SOURCE_DIR}/contrib/snappy") -set (SNAPPY_IS_BIG_ENDIAN 0) +if (ARCH_S390X) + set (SNAPPY_IS_BIG_ENDIAN 1) +else () + set (SNAPPY_IS_BIG_ENDIAN 0) +endif() set (HAVE_BYTESWAP_H 1) set (HAVE_SYS_MMAN_H 1) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 633de4fdbfc..522fd354393 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.12.3.5" +ARG VERSION="23.1.2.9" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index b6a93c808c3..434b4c3bff0 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.12.3.5" +ARG VERSION="23.1.2.9" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 2df50a7934c..cf33dffa646 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -138,6 +138,8 @@ function clone_submodules contrib/c-ares contrib/morton-nd contrib/xxHash + contrib/simdjson + contrib/liburing ) git submodule sync @@ -158,7 +160,9 @@ function run_cmake "-DENABLE_THINLTO=0" "-DUSE_UNWIND=1" "-DENABLE_NURAFT=1" + "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" + "-DENABLE_LIBURING=1" ) export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" @@ -234,6 +238,7 @@ function run_tests --check-zookeeper-session --order random --print-time + --report-logs-stats --jobs "${NPROC}" ) time clickhouse-test "${test_opts[@]}" -- "$FASTTEST_FOCUS" 2>&1 \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index 3458cf905da..75f2a0af358 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -18,13 +18,25 @@ repo_dir=ch BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-15_debug_none_unsplitted_disable_False_binary"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} +function git_clone_with_retry +{ + for _ in 1 2 3 4; do + if git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$1" 2>&1 | ts '%Y-%m-%d %H:%M:%S';then + return 0 + else + sleep 0.5 + fi + done + return 1 +} + function clone { # For local runs, start directly from the "fuzz" stage. rm -rf "$repo_dir" ||: mkdir "$repo_dir" ||: - git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$repo_dir" 2>&1 | ts '%Y-%m-%d %H:%M:%S' + git_clone_with_retry "$repo_dir" ( cd "$repo_dir" if [ "$PR_TO_TEST" != "0" ]; then diff --git a/docker/test/performance-comparison/README.md b/docker/test/performance-comparison/README.md index 719fbd82b22..fd9001e23c7 100644 --- a/docker/test/performance-comparison/README.md +++ b/docker/test/performance-comparison/README.md @@ -50,7 +50,7 @@ Action required for every item -- these are errors that must be fixed. A query is supposed to run longer than 0.1 second. If your query runs faster, increase the amount of processed data to bring the run time above this threshold. You can use a bigger table (e.g. `hits_100m` instead of `hits_10m`), increase a `LIMIT`, make a query single-threaded, and so on. Queries that are too fast suffer from poor stability and precision. -#### Partial Queries +#### Backward-incompatible Queries Action required for the cells marked in red. Shows the queries we are unable to run on an old server -- probably because they contain a new function. You should see this table when you add a new function and a performance test for it. Check that the run time and variance are acceptable (run time between 0.1 and 1 seconds, variance below 10%). If not, they will be highlighted in red. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 4733cfd3924..338a0c02a55 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -399,7 +399,7 @@ clickhouse-local --query " create view query_runs as select * from file('analyze/query-runs.tsv', TSV, 'test text, query_index int, query_id text, version UInt8, time float'); --- Separately process 'partial' queries which we could only run on the new server +-- Separately process backward-incompatible ('partial') queries which we could only run on the new server -- because they use new functions. We can't make normal stats for them, but still -- have to show some stats so that the PR author can tweak them. create view partial_queries as select test, query_index @@ -650,7 +650,7 @@ create view partial_query_times as select * from 'test text, query_index int, time_stddev float, time_median double') ; --- Report for partial queries that we could only run on the new server (e.g. +-- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g. -- queries with new functions added in the tested PR). create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') settings output_format_decimal_trailing_zeros = 1 @@ -829,7 +829,7 @@ create view query_runs as select * from file('analyze/query-runs.tsv', TSV, -- Guess the number of query runs used for this test. The number is required to -- calculate and check the average query run time in the report. -- We have to be careful, because we will encounter: --- 1) partial queries which run only on one server +-- 1) backward-incompatible ('partial') queries which run only on one server -- 3) some errors that make query run for a different number of times on a -- particular server. -- diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 960f23be95c..782cf29863c 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -30,7 +30,7 @@ faster_queries = 0 slower_queries = 0 unstable_queries = 0 very_unstable_queries = 0 -unstable_partial_queries = 0 +unstable_backward_incompatible_queries = 0 # max seconds to run one query by itself, not counting preparation allowed_single_run_time = 2 @@ -378,13 +378,13 @@ if args.report == "main": ] ) - def add_partial(): + def add_backward_incompatible(): rows = tsvRows("report/partial-queries-report.tsv") if not rows: return - global unstable_partial_queries, slow_average_tests, tables - text = tableStart("Partial Queries") + global unstable_backward_incompatible_queries, slow_average_tests, tables + text = tableStart("Backward-incompatible queries") columns = ["Median time, s", "Relative time variance", "Test", "#", "Query"] text += tableHeader(columns) attrs = ["" for c in columns] @@ -392,7 +392,7 @@ if args.report == "main": anchor = f"{currentTableAnchor()}.{row[2]}.{row[3]}" if float(row[1]) > 0.10: attrs[1] = f'style="background: {color_bad}"' - unstable_partial_queries += 1 + unstable_backward_incompatible_queries += 1 errors_explained.append( [ f"The query no. {row[3]} of test '{row[2]}' has excessive variance of run time. Keep it below 10%" @@ -414,7 +414,7 @@ if args.report == "main": text += tableEnd() tables.append(text) - add_partial() + add_backward_incompatible() def add_changes(): rows = tsvRows("report/changed-perf.tsv") @@ -630,8 +630,8 @@ if args.report == "main": status = "failure" message_array.append(str(slower_queries) + " slower") - if unstable_partial_queries: - very_unstable_queries += unstable_partial_queries + if unstable_backward_incompatible_queries: + very_unstable_queries += unstable_backward_incompatible_queries status = "failure" # Don't show mildly unstable queries, only the very unstable ones we diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index d0d76fb0525..aa242bfa98d 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -11,6 +11,18 @@ set -x # core.COMM.PID-TID sysctl kernel.core_pattern='core.%e.%p-%P' +OK="\tOK\t\\N\t" +FAIL="\tFAIL\t\\N\t" +function escaped() +{ + # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. + clickhouse local -S 's String' --input-format=LineAsString -q "select * from table format CustomSeparated settings format_custom_row_after_delimiter='\\\\\\\\n'" +} + +function head_escaped() +{ + head -50 $1 | escaped +} function install_packages() { @@ -33,7 +45,9 @@ function configure() ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_previous_release_tag # avoid too slow startup - sudo cat /etc/clickhouse-server/config.d/keeper_port.xml | sed "s|100000|10000|" > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "s|100000|10000|" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml @@ -136,6 +150,7 @@ function stop() clickhouse stop --max-tries "$max_tries" --do-not-kill && return # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. + echo -e "Possible deadlock on shutdown (see gdb.log)$FAIL" >> /test_output/test_results.tsv kill -TERM "$(pidof gdb)" ||: sleep 5 echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log @@ -151,10 +166,11 @@ function start() if [ "$counter" -gt ${1:-120} ] then echo "Cannot start clickhouse-server" - echo -e "Cannot start clickhouse-server\tFAIL" >> /test_output/test_results.tsv + rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt ||: + echo -e "Cannot start clickhouse-server$FAIL$(head_escaped /test_output/application_errors.txt)" >> /test_output/test_results.tsv cat /var/log/clickhouse-server/stdout.log - tail -n1000 /var/log/clickhouse-server/stderr.log - tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n1000 + tail -n100 /var/log/clickhouse-server/stderr.log + tail -n100000 /var/log/clickhouse-server/clickhouse-server.log | rg -F -v -e ' RaftInstance:' -e ' RaftInstance' | tail -n100 break fi # use root to match with current uid @@ -252,9 +268,92 @@ start clickhouse-client --query "SHOW TABLES FROM datasets" clickhouse-client --query "SHOW TABLES FROM test" -clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" -clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" -clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" +clickhouse-client --query "CREATE TABLE test.hits_s3 (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, + EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, + UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, RefererDomain String, + Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), + RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, FlashMajor UInt8, + FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, UserAgentMinor FixedString(2), + CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, MobilePhoneModel String, Params String, + IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, + WindowClientWidth UInt16, WindowClientHeight UInt16, ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, + SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, + IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, + IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, + Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), RemoteIP UInt32, + RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, HistoryLength Int16, BrowserLanguage FixedString(2), + BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, HTTPError UInt16, SendTiming Int32, + DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, FetchTiming Int32, + RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, + LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, + RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, + ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, + OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, + UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, + URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, + ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), + IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" +clickhouse-client --query "CREATE TABLE test.hits (WatchID UInt64, JavaEnable UInt8, Title String, GoodEvent Int16, + EventTime DateTime, EventDate Date, CounterID UInt32, ClientIP UInt32, ClientIP6 FixedString(16), RegionID UInt32, + UserID UInt64, CounterClass Int8, OS UInt8, UserAgent UInt8, URL String, Referer String, URLDomain String, + RefererDomain String, Refresh UInt8, IsRobot UInt8, RefererCategories Array(UInt16), URLCategories Array(UInt16), + URLRegions Array(UInt32), RefererRegions Array(UInt32), ResolutionWidth UInt16, ResolutionHeight UInt16, ResolutionDepth UInt8, + FlashMajor UInt8, FlashMinor UInt8, FlashMinor2 String, NetMajor UInt8, NetMinor UInt8, UserAgentMajor UInt16, + UserAgentMinor FixedString(2), CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, MobilePhone UInt8, + MobilePhoneModel String, Params String, IPNetworkID UInt32, TraficSourceID Int8, SearchEngineID UInt16, + SearchPhrase String, AdvEngineID UInt8, IsArtifical UInt8, WindowClientWidth UInt16, WindowClientHeight UInt16, + ClientTimeZone Int16, ClientEventTime DateTime, SilverlightVersion1 UInt8, SilverlightVersion2 UInt8, SilverlightVersion3 UInt32, + SilverlightVersion4 UInt16, PageCharset String, CodeVersion UInt32, IsLink UInt8, IsDownload UInt8, IsNotBounce UInt8, + FUniqID UInt64, HID UInt32, IsOldCounter UInt8, IsEvent UInt8, IsParameter UInt8, DontCountHits UInt8, WithHash UInt8, + HitColor FixedString(1), UTCEventTime DateTime, Age UInt8, Sex UInt8, Income UInt8, Interests UInt16, Robotness UInt8, + GeneralInterests Array(UInt16), RemoteIP UInt32, RemoteIP6 FixedString(16), WindowName Int32, OpenerName Int32, + HistoryLength Int16, BrowserLanguage FixedString(2), BrowserCountry FixedString(2), SocialNetwork String, SocialAction String, + HTTPError UInt16, SendTiming Int32, DNSTiming Int32, ConnectTiming Int32, ResponseStartTiming Int32, ResponseEndTiming Int32, + FetchTiming Int32, RedirectTiming Int32, DOMInteractiveTiming Int32, DOMContentLoadedTiming Int32, DOMCompleteTiming Int32, + LoadEventStartTiming Int32, LoadEventEndTiming Int32, NSToDOMContentLoadedTiming Int32, FirstPaintTiming Int32, + RedirectCount Int8, SocialSourceNetworkID UInt8, SocialSourcePage String, ParamPrice Int64, ParamOrderID String, + ParamCurrency FixedString(3), ParamCurrencyID UInt16, GoalsReached Array(UInt32), OpenstatServiceName String, + OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, UTMMedium String, + UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, RefererHash UInt64, + URLHash UInt64, CLID UInt32, YCLID UInt64, ShareService String, ShareURL String, ShareTitle String, + ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), + IslandID FixedString(16), RequestNum UInt32, RequestTry UInt8) ENGINE = MergeTree() PARTITION BY toYYYYMM(EventDate) + ORDER BY (CounterID, EventDate, intHash32(UserID)) SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" +clickhouse-client --query "CREATE TABLE test.visits (CounterID UInt32, StartDate Date, Sign Int8, IsNew UInt8, + VisitID UInt64, UserID UInt64, StartTime DateTime, Duration UInt32, UTCStartTime DateTime, PageViews Int32, + Hits Int32, IsBounce UInt8, Referer String, StartURL String, RefererDomain String, StartURLDomain String, + EndURL String, LinkURL String, IsDownload UInt8, TraficSourceID Int8, SearchEngineID UInt16, SearchPhrase String, + AdvEngineID UInt8, PlaceID Int32, RefererCategories Array(UInt16), URLCategories Array(UInt16), URLRegions Array(UInt32), + RefererRegions Array(UInt32), IsYandex UInt8, GoalReachesDepth Int32, GoalReachesURL Int32, GoalReachesAny Int32, + SocialSourceNetworkID UInt8, SocialSourcePage String, MobilePhoneModel String, ClientEventTime DateTime, RegionID UInt32, + ClientIP UInt32, ClientIP6 FixedString(16), RemoteIP UInt32, RemoteIP6 FixedString(16), IPNetworkID UInt32, + SilverlightVersion3 UInt32, CodeVersion UInt32, ResolutionWidth UInt16, ResolutionHeight UInt16, UserAgentMajor UInt16, + UserAgentMinor UInt16, WindowClientWidth UInt16, WindowClientHeight UInt16, SilverlightVersion2 UInt8, SilverlightVersion4 UInt16, + FlashVersion3 UInt16, FlashVersion4 UInt16, ClientTimeZone Int16, OS UInt8, UserAgent UInt8, ResolutionDepth UInt8, + FlashMajor UInt8, FlashMinor UInt8, NetMajor UInt8, NetMinor UInt8, MobilePhone UInt8, SilverlightVersion1 UInt8, + Age UInt8, Sex UInt8, Income UInt8, JavaEnable UInt8, CookieEnable UInt8, JavascriptEnable UInt8, IsMobile UInt8, + BrowserLanguage UInt16, BrowserCountry UInt16, Interests UInt16, Robotness UInt8, GeneralInterests Array(UInt16), + Params Array(String), Goals Nested(ID UInt32, Serial UInt32, EventTime DateTime, Price Int64, OrderID String, CurrencyID UInt32), + WatchIDs Array(UInt64), ParamSumPrice Int64, ParamCurrency FixedString(3), ParamCurrencyID UInt16, ClickLogID UInt64, + ClickEventID Int32, ClickGoodEvent Int32, ClickEventTime DateTime, ClickPriorityID Int32, ClickPhraseID Int32, ClickPageID Int32, + ClickPlaceID Int32, ClickTypeID Int32, ClickResourceID Int32, ClickCost UInt32, ClickClientIP UInt32, ClickDomainID UInt32, + ClickURL String, ClickAttempt UInt8, ClickOrderID UInt32, ClickBannerID UInt32, ClickMarketCategoryID UInt32, ClickMarketPP UInt32, + ClickMarketCategoryName String, ClickMarketPPName String, ClickAWAPSCampaignName String, ClickPageName String, ClickTargetType UInt16, + ClickTargetPhraseID UInt64, ClickContextType UInt8, ClickSelectType Int8, ClickOptions String, ClickGroupBannerID Int32, + OpenstatServiceName String, OpenstatCampaignID String, OpenstatAdID String, OpenstatSourceID String, UTMSource String, + UTMMedium String, UTMCampaign String, UTMContent String, UTMTerm String, FromTag String, HasGCLID UInt8, FirstVisit DateTime, + PredLastVisit Date, LastVisit Date, TotalVisits UInt32, TraficSource Nested(ID Int8, SearchEngineID UInt16, AdvEngineID UInt8, + PlaceID UInt16, SocialSourceNetworkID UInt8, Domain String, SearchPhrase String, SocialSourcePage String), Attendance FixedString(16), + CLID UInt32, YCLID UInt64, NormalizedRefererHash UInt64, SearchPhraseHash UInt64, RefererDomainHash UInt64, NormalizedStartURLHash UInt64, + StartURLDomainHash UInt64, NormalizedEndURLHash UInt64, TopLevelDomain UInt64, URLScheme UInt64, OpenstatServiceNameHash UInt64, + OpenstatCampaignIDHash UInt64, OpenstatAdIDHash UInt64, OpenstatSourceIDHash UInt64, UTMSourceHash UInt64, UTMMediumHash UInt64, + UTMCampaignHash UInt64, UTMContentHash UInt64, UTMTermHash UInt64, FromHash UInt64, WebVisorEnabled UInt8, WebVisorActivity UInt32, + ParsedParams Nested(Key1 String, Key2 String, Key3 String, Key4 String, Key5 String, ValueDouble Float64), + Market Nested(Type UInt8, GoalID UInt32, OrderID String, OrderPrice Int64, PP UInt32, DirectPlaceID UInt32, DirectOrderID UInt32, + DirectBannerID UInt32, GoodID String, GoodName String, GoodQuantity Int32, GoodPrice Int64), IslandID FixedString(16)) + ENGINE = CollapsingMergeTree(Sign) PARTITION BY toYYYYMM(StartDate) ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID) + SAMPLE BY intHash32(UserID) SETTINGS index_granularity = 8192, storage_policy='s3_cache'" clickhouse-client --query "INSERT INTO test.hits_s3 SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" clickhouse-client --query "INSERT INTO test.hits SELECT * FROM datasets.hits_v1 SETTINGS enable_filesystem_cache_on_write_operations=0" @@ -275,7 +374,9 @@ export ZOOKEEPER_FAULT_INJECTION=1 configure # But we still need default disk because some tables loaded only into it -sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml | sed "s|
s3
|
s3
default|" > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp +sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ + | sed "s|
s3
|
s3
default|" \ + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml @@ -283,8 +384,12 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau start ./stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ - && echo -e 'Test script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Test script failed\tFAIL' >> /test_output/test_results.tsv + && echo -e "Test script exit code$OK" >> /test_output/test_results.tsv \ + || echo -e "Test script failed$FAIL script exit code: $?" >> /test_output/test_results.tsv + +# NOTE Hung check is implemented in docker/tests/stress/stress +rg -Fa "No queries hung" /test_output/test_results.tsv | grep -Fa "OK" \ + || echo -e "Hung check failed, possible deadlock found (see hung_check.log)$FAIL$(head_escaped /test_output/hung_check.log)" stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.stress.log @@ -295,9 +400,10 @@ unset "${!THREAD_@}" start -clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || (echo -e 'Server failed to start (see application_errors.txt and clickhouse-server.clean.log)\tFAIL' >> /test_output/test_results.tsv \ - && rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt) +clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ + || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt \ + && echo -e "Server failed to start (see application_errors.txt and clickhouse-server.clean.log)$FAIL$(head_escaped /test_output/application_errors.txt)" \ + >> /test_output/test_results.tsv) stop @@ -310,49 +416,49 @@ stop rg -Fa "==================" /var/log/clickhouse-server/stderr.log | rg -v "in query:" >> /test_output/tmp rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No sanitizer asserts\tOK' >> /test_output/test_results.tsv + && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ + || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv rm -f /test_output/tmp # OOM rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + && echo -e "Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ + || echo -e "No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv # Logical errors -rg -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \ - && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv +rg -Fa "Code: 49. DB::Exception: " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/logical_errors.txt \ + && echo -e "Logical error thrown (see clickhouse-server.log or logical_errors.txt)$FAIL$(head_escaped /test_output/logical_errors.txt)" >> /test_output/test_results.tsv \ + || echo -e "No logical errors$OK" >> /test_output/test_results.tsv # Remove file logical_errors.txt if it's empty [ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt # No such key errors rg --text "Code: 499.*The specified key does not exist" /var/log/clickhouse-server/clickhouse-server*.log > /test_output/no_such_key_errors.txt \ - && echo -e 'S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No lost s3 keys\tOK' >> /test_output/test_results.tsv + && echo -e "S3_ERROR No such key thrown (see clickhouse-server.log or no_such_key_errors.txt)$FAIL$(head_escaped /test_output/no_such_key_errors.txt)" >> /test_output/test_results.tsv \ + || echo -e "No lost s3 keys$OK" >> /test_output/test_results.tsv # Remove file no_such_key_errors.txt if it's empty [ -s /test_output/no_such_key_errors.txt ] || rm /test_output/no_such_key_errors.txt # Crash rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server*.log > /dev/null \ - && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv + && echo -e "Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ + || echo -e "Not crashed$OK" >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) rg -Fa " " /var/log/clickhouse-server/clickhouse-server*.log > /test_output/fatal_messages.txt \ - && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + && echo -e "Fatal message in clickhouse-server.log (see fatal_messages.txt)$FAIL$(head_escaped /test_output/fatal_messages.txt)" >> /test_output/test_results.tsv \ + || echo -e "No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv # Remove file fatal_messages.txt if it's empty [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt rg -Fa "########################################" /test_output/* > /dev/null \ - && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv + && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv rg -Fa " received signal " /test_output/gdb.log > /dev/null \ - && echo -e 'Found signal in gdb.log\tFAIL' >> /test_output/test_results.tsv + && echo -e "Found signal in gdb.log$FAIL$(rg -A50 -Fa " received signal " /test_output/gdb.log | escaped)" >> /test_output/test_results.tsv if [ "$DISABLE_BC_CHECK" -ne "1" ]; then echo -e "Backward compatibility check\n" @@ -367,8 +473,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then echo "Download clickhouse-server from the previous release" mkdir previous_release_package_folder - echo $previous_release_tag | download_release_packages && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv + echo $previous_release_tag | download_release_packages && echo -e "Download script exit code$OK" >> /test_output/test_results.tsv \ + || echo -e "Download script failed$FAIL" >> /test_output/test_results.tsv mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.clean.log for table in query_log trace_log @@ -381,13 +487,13 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then # Check if we cloned previous release repository successfully if ! [ "$(ls -A previous_release_repository/tests/queries)" ] then - echo -e "Backward compatibility check: Failed to clone previous release tests\tFAIL" >> /test_output/test_results.tsv + echo -e "Backward compatibility check: Failed to clone previous release tests$FAIL" >> /test_output/test_results.tsv elif ! [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ] then - echo -e "Backward compatibility check: Failed to download previous release packages\tFAIL" >> /test_output/test_results.tsv + echo -e "Backward compatibility check: Failed to download previous release packages$FAIL" >> /test_output/test_results.tsv else - echo -e "Successfully cloned previous release tests\tOK" >> /test_output/test_results.tsv - echo -e "Successfully downloaded previous release packages\tOK" >> /test_output/test_results.tsv + echo -e "Successfully cloned previous release tests$OK" >> /test_output/test_results.tsv + echo -e "Successfully downloaded previous release packages$OK" >> /test_output/test_results.tsv # Uninstall current packages dpkg --remove clickhouse-client @@ -446,9 +552,10 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then mkdir tmp_stress_output - ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ - && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv + ./stress --test-cmd="/usr/bin/clickhouse-test --queries=\"previous_release_repository/tests/queries\"" \ + --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \ + && echo -e "Backward compatibility check: Test script exit code$OK" >> /test_output/test_results.tsv \ + || echo -e "Backward compatibility check: Test script failed$FAIL" >> /test_output/test_results.tsv rm -rf tmp_stress_output # We experienced deadlocks in this command in very rare cases. Let's debug it: @@ -470,9 +577,9 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then export ZOOKEEPER_FAULT_INJECTION=0 configure start 500 - clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \ - || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \ - && rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt) + clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ + || (rg --text ".*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt \ + && echo -e "Backward compatibility check: Server failed to start$FAIL$(head_escaped /test_output/bc_check_application_errors.txt)" >> /test_output/test_results.tsv) clickhouse-client --query="SELECT 'Server version: ', version()" @@ -488,8 +595,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then # FIXME Not sure if it's expected, but some tests from BC check may not be finished yet when we restarting server. # Let's just ignore all errors from queries ("} TCPHandler: Code:", "} executeQuery: Code:") # FIXME https://github.com/ClickHouse/ClickHouse/issues/39197 ("Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'") - # NOTE Incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/39263, it's expected - # ("This engine is deprecated and is not supported in transactions", "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part") # FIXME https://github.com/ClickHouse/ClickHouse/issues/39174 - bad mutation does not indicate backward incompatibility echo "Check for Error messages in server log:" rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ @@ -519,7 +624,6 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then -e "} TCPHandler: Code:" \ -e "} executeQuery: Code:" \ -e "Missing columns: 'v3' while processing query: 'v3, k, v1, v2, p'" \ - -e "This engine is deprecated and is not supported in transactions" \ -e "[Queue = DB::MergeMutateRuntimeQueue]: Code: 235. DB::Exception: Part" \ -e "The set of parts restored in place of" \ -e "(ReplicatedMergeTreeAttachThread): Initialization failed. Error" \ @@ -528,9 +632,11 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then -e "MutateFromLogEntryTask" \ -e "No connection to ZooKeeper, cannot get shared table ID" \ -e "Session expired" \ + -e "TOO_MANY_PARTS" \ /var/log/clickhouse-server/clickhouse-server.backward.dirty.log | rg -Fa "" > /test_output/bc_check_error_messages.txt \ - && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + && echo -e "Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)$FAIL$(head_escaped /test_output/bc_check_error_messages.txt)" \ + >> /test_output/test_results.tsv \ + || echo -e "Backward compatibility check: No Error messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv # Remove file bc_check_error_messages.txt if it's empty [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt @@ -539,34 +645,36 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then rg -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp rg -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \ - && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv + && echo -e "Backward compatibility check: Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \ + || echo -e "Backward compatibility check: No sanitizer asserts$OK" >> /test_output/test_results.tsv rm -f /test_output/tmp # OOM rg -Fa " Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + && echo -e "Backward compatibility check: Signal 9 in clickhouse-server.log$FAIL" >> /test_output/test_results.tsv \ + || echo -e "Backward compatibility check: No OOM messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv # Logical errors echo "Check for Logical errors in server log:" - rg -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ - && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv + rg -Fa -A20 "Code: 49. DB::Exception:" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_logical_errors.txt \ + && echo -e "Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)$FAIL$(head_escaped /test_output/bc_check_logical_errors.txt)" \ + >> /test_output/test_results.tsv \ + || echo -e "Backward compatibility check: No logical errors$OK" >> /test_output/test_results.tsv # Remove file bc_check_logical_errors.txt if it's empty [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt # Crash rg -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.backward.*.log > /dev/null \ - && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv + && echo -e "Backward compatibility check: Killed by signal (in clickhouse-server.log)$FAIL" >> /test_output/test_results.tsv \ + || echo -e "Backward compatibility check: Not crashed$OK" >> /test_output/test_results.tsv # It also checks for crash without stacktrace (printed by watchdog) echo "Check for Fatal message in server log:" rg -Fa " " /var/log/clickhouse-server/clickhouse-server.backward.*.log > /test_output/bc_check_fatal_messages.txt \ - && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv + && echo -e "Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)$FAIL$(head_escaped /test_output/bc_check_fatal_messages.txt)" \ + >> /test_output/test_results.tsv \ + || echo -e "Backward compatibility check: No fatal messages in clickhouse-server.log$OK" >> /test_output/test_results.tsv # Remove file bc_check_fatal_messages.txt if it's empty [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt @@ -574,7 +682,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then tar -chf /test_output/coordination.backward.tar /var/lib/clickhouse/coordination ||: for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||: + clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" \ + | zstd --threads=0 > /test_output/$table.backward.tsv.zst ||: done fi fi @@ -583,13 +692,28 @@ dmesg -T > /test_output/dmesg.log # OOM in dmesg -- those are real grep -q -F -e 'Out of memory: Killed process' -e 'oom_reaper: reaped process' -e 'oom-kill:constraint=CONSTRAINT_NONE' /test_output/dmesg.log \ - && echo -e 'OOM in dmesg\tFAIL' >> /test_output/test_results.tsv \ - || echo -e 'No OOM in dmesg\tOK' >> /test_output/test_results.tsv + && echo -e "OOM in dmesg$FAIL$(head_escaped /test_output/dmesg.log)" >> /test_output/test_results.tsv \ + || echo -e "No OOM in dmesg$OK" >> /test_output/test_results.tsv mv /var/log/clickhouse-server/stderr.log /test_output/ # Write check result into check_status.tsv -clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by (lower(test) like '%hung%'), rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv +# Try to choose most specific error for the whole check status +clickhouse-local --structure "test String, res String" -q "SELECT 'failure', test FROM table WHERE res != 'OK' order by +(test like 'Backward compatibility check%'), -- BC check goes last +(test like '%Sanitizer%') DESC, +(test like '%Killed by signal%') DESC, +(test like '%gdb.log%') DESC, +(test ilike '%possible deadlock%') DESC, +(test like '%start%') DESC, +(test like '%dmesg%') DESC, +(test like '%OOM%') DESC, +(test like '%Signal 9%') DESC, +(test like '%Fatal message%') DESC, +(test like '%Error message%') DESC, +(test like '%previous release%') DESC, +rowNumberInAllBlocks() +LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv # Core dumps diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 4afd2745526..86605b5ce0c 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- from multiprocessing import cpu_count -from subprocess import Popen, call, check_output, STDOUT +from subprocess import Popen, call, check_output, STDOUT, PIPE import os import argparse import logging @@ -146,6 +146,12 @@ def prepare_for_hung_check(drop_databases): "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" ) ) + # Long query from 02136_kill_scalar_queries + call_with_retry( + make_query_command( + "KILL QUERY WHERE query LIKE 'SELECT (SELECT number FROM system.numbers WHERE number = 1000000000000)%'" + ) + ) if drop_databases: for i in range(5): @@ -293,14 +299,19 @@ if __name__ == "__main__": "00001_select_1", ] ) - res = call(cmd, shell=True, stderr=STDOUT) - hung_check_status = "No queries hung\tOK\n" + hung_check_log = os.path.join(args.output_folder, "hung_check.log") + tee = Popen(['/usr/bin/tee', hung_check_log], stdin=PIPE) + res = call(cmd, shell=True, stdout=tee.stdin, stderr=STDOUT) + tee.stdin.close() if res != 0 and have_long_running_queries: logging.info("Hung check failed with exit code {}".format(res)) - hung_check_status = "Hung check failed\tFAIL\n" - with open( - os.path.join(args.output_folder, "test_results.tsv"), "w+" - ) as results: - results.write(hung_check_status) + else: + hung_check_status = "No queries hung\tOK\t\\N\t\n" + with open( + os.path.join(args.output_folder, "test_results.tsv"), "w+" + ) as results: + results.write(hung_check_status) + os.remove(hung_check_log) + logging.info("Stress test finished") diff --git a/docs/changelogs/v22.10.7.13-stable.md b/docs/changelogs/v22.10.7.13-stable.md new file mode 100644 index 00000000000..c906e00e524 --- /dev/null +++ b/docs/changelogs/v22.10.7.13-stable.md @@ -0,0 +1,21 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.10.7.13-stable (d261d9036cc) FIXME as compared to v22.10.6.3-stable (645a66d221f) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#44998](https://github.com/ClickHouse/ClickHouse/issues/44998): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#45551](https://github.com/ClickHouse/ClickHouse/issues/45551): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.11.5.15-stable.md b/docs/changelogs/v22.11.5.15-stable.md new file mode 100644 index 00000000000..742a8740514 --- /dev/null +++ b/docs/changelogs/v22.11.5.15-stable.md @@ -0,0 +1,22 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.11.5.15-stable (d763e5a9239) FIXME as compared to v22.11.4.3-stable (7f4cf554f69) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#44999](https://github.com/ClickHouse/ClickHouse/issues/44999): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#45552](https://github.com/ClickHouse/ClickHouse/issues/45552): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.8.13.20-lts.md b/docs/changelogs/v22.8.13.20-lts.md new file mode 100644 index 00000000000..d8dd1bd2b1c --- /dev/null +++ b/docs/changelogs/v22.8.13.20-lts.md @@ -0,0 +1,24 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.13.20-lts (e4817946d18) FIXME as compared to v22.8.12.45-lts (86b0ecd5d51) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#45565](https://github.com/ClickHouse/ClickHouse/issues/45565): Fix positional arguments exception Positional argument out of bounds. Closes [#40634](https://github.com/ClickHouse/ClickHouse/issues/40634). [#41189](https://github.com/ClickHouse/ClickHouse/pull/41189) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#44997](https://github.com/ClickHouse/ClickHouse/issues/44997): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#45550](https://github.com/ClickHouse/ClickHouse/issues/45550): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Merge pull request [#38262](https://github.com/ClickHouse/ClickHouse/issues/38262) from PolyProgrammist/fix-ordinary-system-un… [#45650](https://github.com/ClickHouse/ClickHouse/pull/45650) ([alesapin](https://github.com/alesapin)). + diff --git a/docs/changelogs/v23.1.1.3077-stable.md b/docs/changelogs/v23.1.1.3077-stable.md new file mode 100644 index 00000000000..e218be62f09 --- /dev/null +++ b/docs/changelogs/v23.1.1.3077-stable.md @@ -0,0 +1,592 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.1.3077-stable (dcaac477025) FIXME as compared to v22.12.1.1752-stable (688e488e930) + +#### Backward Incompatible Change +* Remove query `SYSTEM RESTART DISK`. [#44647](https://github.com/ClickHouse/ClickHouse/pull/44647) ([alesapin](https://github.com/alesapin)). +* Disallow Gorilla compression on columns of non-Float32 or non-Float64 type. [#45252](https://github.com/ClickHouse/ClickHouse/pull/45252) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove PREALLOCATE for HASHED/SPARSE_HASHED dictionaries. [#45388](https://github.com/ClickHouse/ClickHouse/pull/45388) ([Azat Khuzhin](https://github.com/azat)). +* Parallel quorum inserts might work incorrectly with `*MergeTree` tables created with deprecated syntax. Therefore, parallel quorum inserts support is completely disabled for such tables. It does not affect tables created with a new syntax. [#45430](https://github.com/ClickHouse/ClickHouse/pull/45430) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### New Feature +* Add `quantileInterpolatedWeighted`/`quantilesInterpolatedWeighted` functions. [#38252](https://github.com/ClickHouse/ClickHouse/pull/38252) ([Bharat Nallan](https://github.com/bharatnc)). +* Add an experimental inverted index as a new secondary index type for efficient text search. [#38667](https://github.com/ClickHouse/ClickHouse/pull/38667) ([larryluogit](https://github.com/larryluogit)). +* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#38953](https://github.com/ClickHouse/ClickHouse/pull/38953) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Dictionary source for extracting keys by traversing regular expressions tree. [#40878](https://github.com/ClickHouse/ClickHouse/pull/40878) ([Vage Ogannisian](https://github.com/nooblose)). +* Added parametrized view functionality, now it's possible to specify query parameters for View table engine. resolves [#40907](https://github.com/ClickHouse/ClickHouse/issues/40907). [#41687](https://github.com/ClickHouse/ClickHouse/pull/41687) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* added extendable and configurable scheduling subsystem for IO requests (not yet integrated with IO code itself). [#41840](https://github.com/ClickHouse/ClickHouse/pull/41840) ([Sergei Trifonov](https://github.com/serxa)). +* Added `SYSTEM DROP DATABASE REPLICA` that removes metadata of dead replica of `Replicated` database. Resolves [#41794](https://github.com/ClickHouse/ClickHouse/issues/41794). [#42807](https://github.com/ClickHouse/ClickHouse/pull/42807) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Array join support map type, like function explode in spark. [#43239](https://github.com/ClickHouse/ClickHouse/pull/43239) ([李扬](https://github.com/taiyang-li)). +* Support SQL standard binary and hex string literals. [#43785](https://github.com/ClickHouse/ClickHouse/pull/43785) ([Mo Xuan](https://github.com/mo-avatar)). +* Add experimental query result cache. [#43797](https://github.com/ClickHouse/ClickHouse/pull/43797) ([Robert Schulze](https://github.com/rschu1ze)). +* format datetime in joda datetime style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#43818](https://github.com/ClickHouse/ClickHouse/pull/43818) ([李扬](https://github.com/taiyang-li)). +* to merge [#40878](https://github.com/ClickHouse/ClickHouse/issues/40878) , supporting regexp dictionary. [#43858](https://github.com/ClickHouse/ClickHouse/pull/43858) ([Han Fei](https://github.com/hanfei1991)). +* Implemented a fractional second formatter (`%f`) for formatDateTime. [#44060](https://github.com/ClickHouse/ClickHouse/pull/44060) ([ltrk2](https://github.com/ltrk2)). +* Added age function to calculate difference between two dates or dates with time values expressed as number of full units. Closes [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#44421](https://github.com/ClickHouse/ClickHouse/pull/44421) ([Robert Schulze](https://github.com/rschu1ze)). +* Implemented a fractional second formatter (%f) for formatDateTime. This is slightly modified PR [#44060](https://github.com/ClickHouse/ClickHouse/issues/44060) by @ltrk2. [#44497](https://github.com/ClickHouse/ClickHouse/pull/44497) ([Alexander Gololobov](https://github.com/davenger)). +* Add null source for dictionaries. Closes [#44240](https://github.com/ClickHouse/ClickHouse/issues/44240). [#44502](https://github.com/ClickHouse/ClickHouse/pull/44502) ([mayamika](https://github.com/mayamika)). +* We can use `s3_storage_class` to set different tier. Such as ``` s3 xxx xxx xxx STANDARD/INTELLIGENT_TIERING ``` Closes [#44443](https://github.com/ClickHouse/ClickHouse/issues/44443). [#44707](https://github.com/ClickHouse/ClickHouse/pull/44707) ([chen](https://github.com/xiedeyantu)). +* Try to detect header with column names (and maybe types) for CSV/TSV/CustomSeparated input formats. Add settings `input_format_tsv/csv/custom_detect_header` that enables this behaviour (enabled by default). Closes [#44640](https://github.com/ClickHouse/ClickHouse/issues/44640). [#44953](https://github.com/ClickHouse/ClickHouse/pull/44953) ([Kruglov Pavel](https://github.com/Avogar)). +* Insert default values in case of missing elements in JSON object while parsing named tuple. Add setting `input_format_json_defaults_for_missing_elements_in_named_tuple` that controls this behaviour. Closes [#45142](https://github.com/ClickHouse/ClickHouse/issues/45142)#issuecomment-1380153217. [#45231](https://github.com/ClickHouse/ClickHouse/pull/45231) ([Kruglov Pavel](https://github.com/Avogar)). +* - Add total memory and used memory metrics with respect to cgroup in AsyncMetrics (https://github.com/ClickHouse/ClickHouse/issues/37983). [#45301](https://github.com/ClickHouse/ClickHouse/pull/45301) ([sichenzhao](https://github.com/sichenzhao)). +* Introduce non-throwing variants of hasToken and hasTokenCaseInsensitive. [#45341](https://github.com/ClickHouse/ClickHouse/pull/45341) ([ltrk2](https://github.com/ltrk2)). + +#### Performance Improvement +* Added sharding support in HashedDictionary to allow parallel load (almost linear scaling based on number of shards). [#40003](https://github.com/ClickHouse/ClickHouse/pull/40003) ([Azat Khuzhin](https://github.com/azat)). +* Do not load inactive parts at startup of `MergeTree` tables. [#42181](https://github.com/ClickHouse/ClickHouse/pull/42181) ([Anton Popov](https://github.com/CurtizJ)). +* - Speed up query parsing. [#42284](https://github.com/ClickHouse/ClickHouse/pull/42284) ([Raúl Marín](https://github.com/Algunenano)). +* Always replace OR chain `expr = x1 OR ... OR expr = xN` to `expr IN (x1, ..., xN)` in case if `expr` is a `LowCardinality` column. Setting `optimize_min_equality_disjunction_chain_length` is ignored in this case. [#42889](https://github.com/ClickHouse/ClickHouse/pull/42889) ([Guo Wangyang](https://github.com/guowangy)). +* > Original changelog In the original implementation, the memory of ThreadGroupStatus:: finished_threads_counters_memory is released by moving it to a temporary std::vector, which soon expired and gets destructed. This method is viable, however not straightforward enough. To enhance the code readability, this commit releases the memory in the vector by firstly resizing it to 0 and then shrinking the capacity accordingly. [#43586](https://github.com/ClickHouse/ClickHouse/pull/43586) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* As a follow-up of [#42214](https://github.com/ClickHouse/ClickHouse/issues/42214), this PR tries to optimize the column-wise ternary logic evaluation by achieving auto-vectorization. In the performance test of this [microbenchmark](https://github.com/ZhiguoZh/ClickHouse/blob/20221123-ternary-logic-opt-example/src/Functions/examples/associative_applier_perf.cpp), we've observed a peak **performance gain** of **21x** on the ICX device (Intel Xeon Platinum 8380 CPU). [#43669](https://github.com/ClickHouse/ClickHouse/pull/43669) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Improved latency of reading from storage `S3` and table function `s3` with large number of small files. Now settings `remote_filesystem_read_method` and `remote_filesystem_read_prefetch` take effect while reading from storage `S3`. [#43726](https://github.com/ClickHouse/ClickHouse/pull/43726) ([Anton Popov](https://github.com/CurtizJ)). +* - Avoid acquiring read locks in system.tables if possible. [#43840](https://github.com/ClickHouse/ClickHouse/pull/43840) ([Raúl Marín](https://github.com/Algunenano)). +* The performance experiments of SSB (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could effectively decrease the lock contention for ThreadPoolImpl::mutex by **75%**, increasing the CPU utilization and improving the overall performance by **2.4%**. [#44308](https://github.com/ClickHouse/ClickHouse/pull/44308) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Now optimisation is applied only if the cached HT size is sufficiently large (thresholds were determined empirically and hardcoded). [#44455](https://github.com/ClickHouse/ClickHouse/pull/44455) ([Nikita Taranov](https://github.com/nickitat)). +* ... The whole struct field will be loaded at current, even though we just want to read one field of the struct. [#44484](https://github.com/ClickHouse/ClickHouse/pull/44484) ([lgbo](https://github.com/lgbo-ustc)). +* Small performance improvement for asynchronous reading from remote fs. [#44868](https://github.com/ClickHouse/ClickHouse/pull/44868) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Switched to faster shared (RW) mutex implementation. Performance may be improved in queries with a lot of thread synchronization or for data structures experiencing heavy contention. [#45007](https://github.com/ClickHouse/ClickHouse/pull/45007) ([Sergei Trifonov](https://github.com/serxa)). +* Add fast path for: - col like '%%' - col like '%' - col not like '%' - col not like '%' - match(col, '.*'). [#45244](https://github.com/ClickHouse/ClickHouse/pull/45244) ([李扬](https://github.com/taiyang-li)). +* todo. [#45289](https://github.com/ClickHouse/ClickHouse/pull/45289) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Refactor and Improve streaming engines Kafka/RabbitMQ/NATS and add support for all formats, also refactor formats a bit: - Fix producing messages in row-based formats with suffixes/prefixes. Now every message is formatted complitely with all delimiters and can be parsed back using input format. - Support block-based formats like Native, Parquet, ORC, etc. Every block is formatted as a separated message. The number of rows in one message depends on block size, so you can control it via setting `max_block_size`. - Add new engine settings `kafka_max_rows_per_message/rabbitmq_max_rows_per_message/nats_max_rows_per_message`. They control the number of rows formatted in one message in row-based formats. Default value: 1. - Fix high memory consumption in NATS table engine. - Support arbitrary binary data in NATS producer (previously it worked only with strings contained \0 at the end) - Add missing Kafka/RabbitMQ/NATS engine settings in documentation. - Refactor producing and consuming in Kafka/RabbitMQ/NATS, separate it from WriteBuffers/ReadBuffers semantic. - Refactor output formats: remove callbacks on each row used in Kafka/RabbitMQ/NATS (now we don't use callbacks there), allow to use IRowOutputFormat directly, clarify row end and row between delimiters, make it possible to reset output format to start formatting again - Add proper implementation in formatRow function (bonus after formats refactoring). [#42777](https://github.com/ClickHouse/ClickHouse/pull/42777) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `optimize_or_like_chain` in the new infrastructure. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#42797](https://github.com/ClickHouse/ClickHouse/pull/42797) ([Dmitry Novik](https://github.com/novikd)). +* Improve the Asterisk and ColumnMatcher parsers. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#42884](https://github.com/ClickHouse/ClickHouse/pull/42884) ([Nikolay Degterinsky](https://github.com/evillique)). +* Implement `optimize_redundant_functions_in_order_by` on top of QueryTree. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#42970](https://github.com/ClickHouse/ClickHouse/pull/42970) ([Dmitry Novik](https://github.com/novikd)). +* Support `optimize_group_by_function_keys` in the new analyzer architecture. Also, add support for optimizing GROUPING SETS keys. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#43261](https://github.com/ClickHouse/ClickHouse/pull/43261) ([Dmitry Novik](https://github.com/novikd)). +* Improve reading CSV field in CustomSeparated/Template format. Closes [#42352](https://github.com/ClickHouse/ClickHouse/issues/42352) Closes [#39620](https://github.com/ClickHouse/ClickHouse/issues/39620). [#43332](https://github.com/ClickHouse/ClickHouse/pull/43332) ([Kruglov Pavel](https://github.com/Avogar)). +* Support reading/writing `Nested` tables as `List` of `Struct` in CapnProto format. Read/write `Decimal32/64` as `Int32/64`. Closes [#43319](https://github.com/ClickHouse/ClickHouse/issues/43319). [#43379](https://github.com/ClickHouse/ClickHouse/pull/43379) ([Kruglov Pavel](https://github.com/Avogar)). +* - Unify query elapsed time measurements. [#43455](https://github.com/ClickHouse/ClickHouse/pull/43455) ([Raúl Marín](https://github.com/Algunenano)). +* Support scalar subqueries cache Implementation: * Added a map with hash of the node (without alias) and the evaluated value to Context. Testing: * Added a test-case with new analyser in 02174_cte_scalar_cache.sql. [#43640](https://github.com/ClickHouse/ClickHouse/pull/43640) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Improve automatic usage of structure from insertion table in table functions file/hdfs/s3 when virtual columns present in select query, it fixes possible error `Block structure mismatch` or `number of columns mismatch`. [#43695](https://github.com/ClickHouse/ClickHouse/pull/43695) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for signed arguments in range(). Fixes [#43333](https://github.com/ClickHouse/ClickHouse/issues/43333). [#43733](https://github.com/ClickHouse/ClickHouse/pull/43733) ([sanyu](https://github.com/wineternity)). +* Remove redundant sorting, for example, sorting related ORDER BY clauses in subqueries. Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding `ORDER BY` clauses, but more generic, since it's applied to any redundant sorting steps (not only caused by ORDER BY clause) and applied to subqueries of any depth. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#43905](https://github.com/ClickHouse/ClickHouse/pull/43905) ([Igor Nikonov](https://github.com/devcrafter)). +* Added mmap support for StorageFile, which should improve the performance of clickhouse-local. [#43927](https://github.com/ClickHouse/ClickHouse/pull/43927) ([pufit](https://github.com/pufit)). +* Add ability to disable deduplication for BACKUP (for backups wiithout deduplication ATTACH can be used instead of full RESTORE), example `BACKUP foo TO S3(...) SETTINGS deduplicate_files=0` (default `deduplicate_files=1`). [#43947](https://github.com/ClickHouse/ClickHouse/pull/43947) ([Azat Khuzhin](https://github.com/azat)). +* Make `system.replicas` table do parallel fetches of replicas statuses. Closes [#43918](https://github.com/ClickHouse/ClickHouse/issues/43918). [#43998](https://github.com/ClickHouse/ClickHouse/pull/43998) ([Nikolay Degterinsky](https://github.com/evillique)). +* Refactor and improve schema inference for text formats. Add new setting `schema_inference_make_columns_nullable` that controls making result types `Nullable` (enabled by default);. [#44019](https://github.com/ClickHouse/ClickHouse/pull/44019) ([Kruglov Pavel](https://github.com/Avogar)). +* Better support for PROXYv1. [#44135](https://github.com/ClickHouse/ClickHouse/pull/44135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add information about the latest part check by cleanup thread into `system.parts` table. [#44244](https://github.com/ClickHouse/ClickHouse/pull/44244) ([Dmitry Novik](https://github.com/novikd)). +* Disable functions in readonly for inserts. [#44290](https://github.com/ClickHouse/ClickHouse/pull/44290) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add a setting `simultaneous_parts_removal_limit` to allow to limit the number of parts being processed by one iteration of CleanupThread. [#44461](https://github.com/ClickHouse/ClickHouse/pull/44461) ([Dmitry Novik](https://github.com/novikd)). +* If user only need virtual columns, we don't need to initialize ReadBufferFromS3. May be helpful to [#44246](https://github.com/ClickHouse/ClickHouse/issues/44246). [#44493](https://github.com/ClickHouse/ClickHouse/pull/44493) ([chen](https://github.com/xiedeyantu)). +* Prevent duplicate column names hints. Closes [#44130](https://github.com/ClickHouse/ClickHouse/issues/44130). [#44519](https://github.com/ClickHouse/ClickHouse/pull/44519) ([Joanna Hulboj](https://github.com/jh0x)). +* Allow macro substitution in endpoint of disks resolve [#40951](https://github.com/ClickHouse/ClickHouse/issues/40951). [#44533](https://github.com/ClickHouse/ClickHouse/pull/44533) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Added a `message_format_string` column to `system.text_log`. The column contains a pattern that was used to format the message. [#44543](https://github.com/ClickHouse/ClickHouse/pull/44543) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve schema inference when `input_format_json_read_object_as_string` is enabled. [#44546](https://github.com/ClickHouse/ClickHouse/pull/44546) ([Kruglov Pavel](https://github.com/Avogar)). +* Add user-level setting `database_replicated_allow_replicated_engine_arguments` which allow to ban creation of `ReplicatedMergeTree` tables with arguments in `DatabaseReplicated`. [#44566](https://github.com/ClickHouse/ClickHouse/pull/44566) ([alesapin](https://github.com/alesapin)). +* Prevent users from mistakenly specifying zero (invalid) value for `index_granularity`. This closes [#44536](https://github.com/ClickHouse/ClickHouse/issues/44536). [#44578](https://github.com/ClickHouse/ClickHouse/pull/44578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added possibility to set path to service keytab file in `keytab` parameter in `kerberos` section of config.xml. [#44594](https://github.com/ClickHouse/ClickHouse/pull/44594) ([Roman Vasin](https://github.com/rvasin)). +* Use already written part of the query for fuzzy search (pass to skim). [#44600](https://github.com/ClickHouse/ClickHouse/pull/44600) ([Azat Khuzhin](https://github.com/azat)). +* Enable input_format_json_read_objects_as_strings by default to be able to read nested JSON objects while JSON Object type is experimental. [#44657](https://github.com/ClickHouse/ClickHouse/pull/44657) ([Kruglov Pavel](https://github.com/Avogar)). +* When users do duplicate async inserts, we should dedup inside the memory before we query keeper. [#44682](https://github.com/ClickHouse/ClickHouse/pull/44682) ([Han Fei](https://github.com/hanfei1991)). +* Input/ouptut Avro bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)). +* - Don't parse beyond the quotes when reading UUIDs. [#44686](https://github.com/ClickHouse/ClickHouse/pull/44686) ([Raúl Marín](https://github.com/Algunenano)). +* Infer UInt64 in case of Int64 overflow and fix some transforms in schema inference. [#44696](https://github.com/ClickHouse/ClickHouse/pull/44696) ([Kruglov Pavel](https://github.com/Avogar)). +* Previously dependency resolving inside DatabaseReplicated was done in a hacky way and now it done right using an explicit graph. [#44697](https://github.com/ClickHouse/ClickHouse/pull/44697) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support Bool type in Arrow/Parquet/ORC. Closes [#43970](https://github.com/ClickHouse/ClickHouse/issues/43970). [#44698](https://github.com/ClickHouse/ClickHouse/pull/44698) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `output_format_pretty_row_numbers` does not preserve the counter across the blocks. Closes [#44815](https://github.com/ClickHouse/ClickHouse/issues/44815). [#44832](https://github.com/ClickHouse/ClickHouse/pull/44832) ([flynn](https://github.com/ucasfl)). +* Extend function "toDayOfWeek" with a mode argument describing if a) the week starts on Monday or Sunday and b) if counting starts at 0 or 1. [#44860](https://github.com/ClickHouse/ClickHouse/pull/44860) ([李扬](https://github.com/taiyang-li)). +* - Don't report errors in system.errors due to parts being merged concurrently with the background cleanup process. [#44874](https://github.com/ClickHouse/ClickHouse/pull/44874) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize and fix metrics for Distributed async INSERT. [#44922](https://github.com/ClickHouse/ClickHouse/pull/44922) ([Azat Khuzhin](https://github.com/azat)). +* Added settings to disallow concurrent backups and restores resolves [#43891](https://github.com/ClickHouse/ClickHouse/issues/43891) Implementation: * Added server level settings to disallow concurrent backups and restores, which are read and set when BackupWorker is created in Context. * Settings are set to true by default. * Before starting backup or restores, added a check to see if any other backups/restores are running. For internal request it checks if its from the self node using backup_uuid. [#45072](https://github.com/ClickHouse/ClickHouse/pull/45072) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* add a cache for async block ids. This will reduce the requests of zookeeper when we enable async inserts deduplication. [#45106](https://github.com/ClickHouse/ClickHouse/pull/45106) ([Han Fei](https://github.com/hanfei1991)). +* CRC32 changes to address the WeakHash collision issue in PowerPC. [#45144](https://github.com/ClickHouse/ClickHouse/pull/45144) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). +* Optimize memory consumption during backup to S3: files to S3 now will be copied directly without using `WriteBufferFromS3` (which could use a lot of memory). [#45188](https://github.com/ClickHouse/ClickHouse/pull/45188) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use structure from insertion table in generateRandom without arguments. [#45239](https://github.com/ClickHouse/ClickHouse/pull/45239) ([Kruglov Pavel](https://github.com/Avogar)). +* Use `GetObjectAttributes` request instead of `HeadObject` request to get the size of an object in AWS S3. This change fixes handling endpoints without explicit region, for example. [#45288](https://github.com/ClickHouse/ClickHouse/pull/45288) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `` config parameter for system logs. [#45320](https://github.com/ClickHouse/ClickHouse/pull/45320) ([Stig Bakken](https://github.com/stigsb)). +* Remove redundant sorting, for example, sorting related ORDER BY clauses in subqueries. Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding `ORDER BY` clauses, but more generic, since it's applied to any redundant sorting steps (not only caused by ORDER BY clause) and applied to subqueries of any depth. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#45420](https://github.com/ClickHouse/ClickHouse/pull/45420) ([Igor Nikonov](https://github.com/devcrafter)). +* Allow to implicitly convert floats stored in string fields of JSON to integers in `JSONExtract` functions. E.g. `JSONExtract('{"a": "1000.111"}', 'a', 'UInt64')` -> `1000`, previously it returned 0. [#45432](https://github.com/ClickHouse/ClickHouse/pull/45432) ([Anton Popov](https://github.com/CurtizJ)). +* Added fields `supports_parallel_parsing` and `supports_parallel_formatting` to table `system.formats` for better introspection. [#45499](https://github.com/ClickHouse/ClickHouse/pull/45499) ([Anton Popov](https://github.com/CurtizJ)). +* Attempt to improve fsync latency (by syncing all files at once during fetches and small files after mutations) and one tiny fix for fsync_part_directory. [#45537](https://github.com/ClickHouse/ClickHouse/pull/45537) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Fix HTTP requests without path for AWS. After updating AWS SDK the sdk no longer adds a slash to requesting paths so we need to do it in our PocoHTTPClient to keep HTTP requests correct. [#45238](https://github.com/ClickHouse/ClickHouse/pull/45238) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix backup if mutations get killed during the backup process. [#45351](https://github.com/ClickHouse/ClickHouse/pull/45351) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Builtin skim for fuzzy search in clickhouse client/local history. [#44239](https://github.com/ClickHouse/ClickHouse/pull/44239) ([Azat Khuzhin](https://github.com/azat)). +* Memory limit for server is set now in AST fuzz tests to avoid OOMs. [#44282](https://github.com/ClickHouse/ClickHouse/pull/44282) ([Nikita Taranov](https://github.com/nickitat)). +* In rare cases, we don't rebuild binaries, because another task with a similar prefix succeeded. E.g. `binary_darwin` didn't restart because `binary_darwin_aarch64`. [#44311](https://github.com/ClickHouse/ClickHouse/pull/44311) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* The "universal.sh" now fetches a SSE2 build on systems which don't have SSE4.2. [#44366](https://github.com/ClickHouse/ClickHouse/pull/44366) ([Robert Schulze](https://github.com/rschu1ze)). +* Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* ... 1. Added pytest-random by default in integration tests runner 2. Disable TSAN checks for tests with GPRC ( like https://s3.amazonaws.com/clickhouse-test-reports/42807/e9d7407a58f6e3f7d88c0c534685704f23560704/integration_tests__tsan__[4/6].html ) 3. Cleanup tables after tests in odbc. [#44711](https://github.com/ClickHouse/ClickHouse/pull/44711) ([Ilya Yatsishin](https://github.com/qoega)). +* We removed support for shared linking because of Rust. Actually, Rust is only an excuse for this removal, and we wanted to remove it nevertheless. [#44828](https://github.com/ClickHouse/ClickHouse/pull/44828) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Checks will try to download images before running integration tests. If image, proxy or whatever is broken in infrastructure it will not make tests flaky. Images will be cached locally and download time will not be added to random tests. Compose images are now changed to be used without correct environment from helpers/cluster.py. [#44848](https://github.com/ClickHouse/ClickHouse/pull/44848) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* The performance tests were silently broken because `Errors` wasn't detected in the status message. [#44867](https://github.com/ClickHouse/ClickHouse/pull/44867) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SQLite library is updated to the latest. It is used for the SQLite database and table integration engines. Also, fixed a false-positive TSan report. This closes [#45027](https://github.com/ClickHouse/ClickHouse/issues/45027). [#45031](https://github.com/ClickHouse/ClickHouse/pull/45031) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix report sending in the case when FastTest failed. [#45588](https://github.com/ClickHouse/ClickHouse/pull/45588) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* #40651 [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). +* Fix possible use-of-unitialized value after executing expressions after sorting. Closes [#43386](https://github.com/ClickHouse/ClickHouse/issues/43386) CC: @nickitat. [#43635](https://github.com/ClickHouse/ClickHouse/pull/43635) ([Kruglov Pavel](https://github.com/Avogar)). +* Better handling of NULL in aggregate combinators, fix possible segfault/logical error while using optimization `optimize_rewrite_sum_if_to_count_if`. Closes [#43758](https://github.com/ClickHouse/ClickHouse/issues/43758). [#43813](https://github.com/ClickHouse/ClickHouse/pull/43813) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix CREATE USER/ROLE query settings constraints. [#43993](https://github.com/ClickHouse/ClickHouse/pull/43993) ([Nikolay Degterinsky](https://github.com/evillique)). +* * Fix wrong behavior of `JOIN ON t1.x = t2.x AND 1 = 1`, forbid such queries. [#44016](https://github.com/ClickHouse/ClickHouse/pull/44016) ([Vladimir C](https://github.com/vdimir)). +* Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)). +* Bring interval subtraction from datetime in line with addition. [#44241](https://github.com/ClickHouse/ClickHouse/pull/44241) ([ltrk2](https://github.com/ltrk2)). +* Fix double-free in HashTable::clearAndShrink() with zero elements in it. [#44256](https://github.com/ClickHouse/ClickHouse/pull/44256) ([Azat Khuzhin](https://github.com/azat)). +* Remove limits on maximum size of the result for view. [#44261](https://github.com/ClickHouse/ClickHouse/pull/44261) ([lizhuoyu5](https://github.com/lzydmxy)). +* Fix possible logical error in cache if `do_not_evict_index_and_mrk_files=1`. Closes [#42142](https://github.com/ClickHouse/ClickHouse/issues/42142). [#44268](https://github.com/ClickHouse/ClickHouse/pull/44268) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible too early cache write interruption in write-through cache (caching could be stopped due to false assumption when it shouldn't have). [#44289](https://github.com/ClickHouse/ClickHouse/pull/44289) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible crash in case function `IN` with constant arguments was used as a constant argument together with `LowCardinality`. Fixes [#44221](https://github.com/ClickHouse/ClickHouse/issues/44221). [#44346](https://github.com/ClickHouse/ClickHouse/pull/44346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix support for complex parameters (like arrays) of parametric aggregate functions. This closes [#30975](https://github.com/ClickHouse/ClickHouse/issues/30975). The aggregate function `sumMapFiltered` was unusable in distributed queries before this change. [#44358](https://github.com/ClickHouse/ClickHouse/pull/44358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Fix possible nullptr deference in JoinSwitcher with `allow_experimental_analyzer`. [#44371](https://github.com/ClickHouse/ClickHouse/pull/44371) ([Vladimir C](https://github.com/vdimir)). +* Fix reading ObjectId in BSON schema inference. [#44382](https://github.com/ClickHouse/ClickHouse/pull/44382) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix race which can lead to premature temp parts removal before merge finished in ReplicatedMergeTree. This issue could lead to errors like `No such file or directory: xxx`. Fixes [#43983](https://github.com/ClickHouse/ClickHouse/issues/43983). [#44383](https://github.com/ClickHouse/ClickHouse/pull/44383) ([alesapin](https://github.com/alesapin)). +* Some invalid `SYSTEM ... ON CLUSTER` queries worked in an unexpected way if a cluster name was not specified. It's fixed, now invalid queries throw `SYNTAX_ERROR` as they should. Fixes [#44264](https://github.com/ClickHouse/ClickHouse/issues/44264). [#44387](https://github.com/ClickHouse/ClickHouse/pull/44387) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix reading Map type in ORC format. [#44400](https://github.com/ClickHouse/ClickHouse/pull/44400) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading columns that are not presented in input data in Parquet/ORC formats. Previously it could lead to error `INCORRECT_NUMBER_OF_COLUMNS`. Closes [#44333](https://github.com/ClickHouse/ClickHouse/issues/44333). [#44405](https://github.com/ClickHouse/ClickHouse/pull/44405) ([Kruglov Pavel](https://github.com/Avogar)). +* Previously bar() function used the same '▋' (U+258B "Left five eighths block") character to display both 5/8 and 6/8 bars. This change corrects this behavior by using '▊' (U+258A "Left three quarters block") for displaying 6/8 bar. [#44410](https://github.com/ClickHouse/ClickHouse/pull/44410) ([Alexander Gololobov](https://github.com/davenger)). +* Placing profile settings after profile settings constraints in the configuration file made constraints ineffective. [#44411](https://github.com/ClickHouse/ClickHouse/pull/44411) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix `SYNTAX_ERROR` while running `EXPLAIN AST INSERT` queries with data. Closes [#44207](https://github.com/ClickHouse/ClickHouse/issues/44207). [#44413](https://github.com/ClickHouse/ClickHouse/pull/44413) ([save-my-heart](https://github.com/save-my-heart)). +* Fix reading bool value with CRLF in CSV format. Closes [#44401](https://github.com/ClickHouse/ClickHouse/issues/44401). [#44442](https://github.com/ClickHouse/ClickHouse/pull/44442) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't execute and/or/if/multiIf on LowCardinality dictionary, so the result type cannot be LowCardinality. It could lead to error `Illegal column ColumnLowCardinality` in some cases. Fixes [#43603](https://github.com/ClickHouse/ClickHouse/issues/43603). [#44469](https://github.com/ClickHouse/ClickHouse/pull/44469) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix mutations with setting `max_streams_for_merge_tree_reading`. [#44472](https://github.com/ClickHouse/ClickHouse/pull/44472) ([Anton Popov](https://github.com/CurtizJ)). +* Fix potential null pointer dereference with GROUPING SETS in ASTSelectQuery::formatImpl ([#43049](https://github.com/ClickHouse/ClickHouse/issues/43049)). [#44479](https://github.com/ClickHouse/ClickHouse/pull/44479) ([Robert Schulze](https://github.com/rschu1ze)). +* Validate types in table function arguments, CAST function arguments, JSONAsObject schema inference according to settings. [#44501](https://github.com/ClickHouse/ClickHouse/pull/44501) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix IN function with LC and const column, close [#44503](https://github.com/ClickHouse/ClickHouse/issues/44503). [#44506](https://github.com/ClickHouse/ClickHouse/pull/44506) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Projections do not work in presence of WITH ROLLUP, WITH CUBE and WITH TOTALS. In previous versions, a query produced an exception instead of skipping the usage of projections. This closes [#44614](https://github.com/ClickHouse/ClickHouse/issues/44614). This closes [#42772](https://github.com/ClickHouse/ClickHouse/issues/42772). [#44615](https://github.com/ClickHouse/ClickHouse/pull/44615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Fix bug in experimental analyzer and `aggregate_functions_null_for_empty = 1`. Close [#44644](https://github.com/ClickHouse/ClickHouse/issues/44644). [#44648](https://github.com/ClickHouse/ClickHouse/pull/44648) ([Vladimir C](https://github.com/vdimir)). +* async blocks are not cleaned because the function `get all blocks sorted by time` didn't get async blocks. [#44651](https://github.com/ClickHouse/ClickHouse/pull/44651) ([Han Fei](https://github.com/hanfei1991)). +* Fix `LOGICAL_ERROR` `The top step of the right pipeline should be ExpressionStep` for JOIN with subquery, UNION, and TOTALS. Fixes [#43687](https://github.com/ClickHouse/ClickHouse/issues/43687). [#44673](https://github.com/ClickHouse/ClickHouse/pull/44673) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid std::out_of_range exception in StorageExecutable. [#44681](https://github.com/ClickHouse/ClickHouse/pull/44681) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply `optimize_syntax_fuse_functions` to quantiles on AST, close [#44712](https://github.com/ClickHouse/ClickHouse/issues/44712). [#44713](https://github.com/ClickHouse/ClickHouse/pull/44713) ([Vladimir C](https://github.com/vdimir)). +* Fix bug with wrong type in Merge table and PREWHERE, close [#43324](https://github.com/ClickHouse/ClickHouse/issues/43324). [#44716](https://github.com/ClickHouse/ClickHouse/pull/44716) ([Vladimir C](https://github.com/vdimir)). +* Fix possible crash during shutdown (while destroying TraceCollector). Fixes [#44757](https://github.com/ClickHouse/ClickHouse/issues/44757). [#44758](https://github.com/ClickHouse/ClickHouse/pull/44758) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a possible crash in distributed query processing. The crash could happen if a query with totals or extremes returned an empty result and there are mismatched types in the Distrubuted and the local tables. Fixes [#44738](https://github.com/ClickHouse/ClickHouse/issues/44738). [#44760](https://github.com/ClickHouse/ClickHouse/pull/44760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix fsync for fetches (`min_compressed_bytes_to_fsync_after_fetch`)/small files (ttl.txt, columns.txt) in mutations (`min_rows_to_fsync_after_merge`/`min_compressed_bytes_to_fsync_after_merge`). [#44781](https://github.com/ClickHouse/ClickHouse/pull/44781) ([Azat Khuzhin](https://github.com/azat)). +* A rare race condition was possible when querying the `system.parts` or `system.parts_columns` tables in the presence of parts being moved between disks. Introduced in [#41145](https://github.com/ClickHouse/ClickHouse/issues/41145). [#44809](https://github.com/ClickHouse/ClickHouse/pull/44809) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the error `Context has expired` which could appear with enabled projections optimization. Can be reproduced for queries with specific functions, like `dictHas/dictGet` which use context in runtime. Fixes [#44844](https://github.com/ClickHouse/ClickHouse/issues/44844). [#44850](https://github.com/ClickHouse/ClickHouse/pull/44850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* - Ignore hwmon sensors on label read issues. [#44895](https://github.com/ClickHouse/ClickHouse/pull/44895) ([Raúl Marín](https://github.com/Algunenano)). +* Use `max_delay_to_insert` value in case calculated time to delay INSERT exceeds the setting value. Related to [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44916](https://github.com/ClickHouse/ClickHouse/pull/44916) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix error `Different order of columns in UNION subquery` for queries with `UNION`. Fixes [#44866](https://github.com/ClickHouse/ClickHouse/issues/44866). [#44920](https://github.com/ClickHouse/ClickHouse/pull/44920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Delay for INSERT can be calculated incorrectly, which can lead to always using `max_delay_to_insert` setting as delay instead of a correct value. Using simple formula `max_delay_to_insert * (parts_over_threshold/max_allowed_parts_over_threshold)` i.e. delay grows proportionally to parts over threshold. Closes [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44954](https://github.com/ClickHouse/ClickHouse/pull/44954) ([Igor Nikonov](https://github.com/devcrafter)). +* fix alter table ttl error when wide part has light weight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native [#43221](https://github.com/ClickHouse/ClickHouse/issues/43221). [#45024](https://github.com/ClickHouse/ClickHouse/pull/45024) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#45043](https://github.com/ClickHouse/ClickHouse/pull/45043) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* A buffer overflow was possible in the parser. Found by fuzzer. [#45047](https://github.com/ClickHouse/ClickHouse/pull/45047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible cannot-read-all-data error in storage FileLog. Closes [#45051](https://github.com/ClickHouse/ClickHouse/issues/45051), [#38257](https://github.com/ClickHouse/ClickHouse/issues/38257). [#45057](https://github.com/ClickHouse/ClickHouse/pull/45057) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Memory efficient aggregation (setting `distributed_aggregation_memory_efficient`) is disabled when grouping sets are present in the query. [#45058](https://github.com/ClickHouse/ClickHouse/pull/45058) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `RANGE_HASHED` dictionary to count range columns as part of primary key during updates when `update_field` is specified. Closes [#44588](https://github.com/ClickHouse/ClickHouse/issues/44588). [#45061](https://github.com/ClickHouse/ClickHouse/pull/45061) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix error `Cannot capture column` for `LowCardinality` captured argument of nested labmda. Fixes [#45028](https://github.com/ClickHouse/ClickHouse/issues/45028). [#45065](https://github.com/ClickHouse/ClickHouse/pull/45065) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the wrong query result of `additional_table_filters` (additional filter was not applied) in case if minmax/count projection is used. [#45133](https://github.com/ClickHouse/ClickHouse/pull/45133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* - Fixed bug in `histogram` function accepting negative values. [#45147](https://github.com/ClickHouse/ClickHouse/pull/45147) ([simpleton](https://github.com/rgzntrade)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#45150](https://github.com/ClickHouse/ClickHouse/pull/45150) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix wrong column nullability in StoreageJoin, close [#44940](https://github.com/ClickHouse/ClickHouse/issues/44940). [#45184](https://github.com/ClickHouse/ClickHouse/pull/45184) ([Vladimir C](https://github.com/vdimir)). +* Fix `background_fetches_pool_size` settings reload (increase at runtime). [#45189](https://github.com/ClickHouse/ClickHouse/pull/45189) ([Raúl Marín](https://github.com/Algunenano)). +* Correctly process `SELECT` queries on KV engines (e.g. KeeperMap, EmbeddedRocksDB) using `IN` on the key with subquery producing different type. [#45215](https://github.com/ClickHouse/ClickHouse/pull/45215) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix logical error in SEMI JOIN & join_use_nulls in some cases, close [#45163](https://github.com/ClickHouse/ClickHouse/issues/45163), close [#45209](https://github.com/ClickHouse/ClickHouse/issues/45209). [#45230](https://github.com/ClickHouse/ClickHouse/pull/45230) ([Vladimir C](https://github.com/vdimir)). +* Fix heap-use-after-free in reading from s3. [#45253](https://github.com/ClickHouse/ClickHouse/pull/45253) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug when the Avro Union type is ['null', Nested type], closes [#45275](https://github.com/ClickHouse/ClickHouse/issues/45275). Fix bug that incorrectly infer `bytes` type to `Float`. [#45276](https://github.com/ClickHouse/ClickHouse/pull/45276) ([flynn](https://github.com/ucasfl)). +* Throw a correct exception when explicit PREWHERE cannot be used with table using storage engine `Merge`. [#45319](https://github.com/ClickHouse/ClickHouse/pull/45319) ([Antonio Andelic](https://github.com/antonio2368)). +* Under WSL1 Ubuntu self-extracting clickhouse fails to decompress due to inconsistency - /proc/self/maps reporting 32bit file's inode, while stat reporting 64bit inode. [#45339](https://github.com/ClickHouse/ClickHouse/pull/45339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix race in Distributed table startup (that could lead to processing file of async INSERT multiple times). [#45360](https://github.com/ClickHouse/ClickHouse/pull/45360) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible crash while reading from storage `S3` and table function `s3` in case when `ListObject` request has failed. [#45371](https://github.com/ClickHouse/ClickHouse/pull/45371) ([Anton Popov](https://github.com/CurtizJ)). +* * Fixed some bugs in JOINS with WHERE by disabling "move to prewhere" optimization for it, close [#44062](https://github.com/ClickHouse/ClickHouse/issues/44062). [#45391](https://github.com/ClickHouse/ClickHouse/pull/45391) ([Vladimir C](https://github.com/vdimir)). +* Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix s3Cluster schema inference when structure from insertion table is used in `INSERT INTO ... SELECT * FROM s3Cluster` queries. [#45422](https://github.com/ClickHouse/ClickHouse/pull/45422) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug in JSON/BSONEachRow parsing with HTTP that could lead to using default values for some columns instead of values from data. [#45424](https://github.com/ClickHouse/ClickHouse/pull/45424) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed bug (Code: 632. DB::Exception: Unexpected data ... after parsed IPv6 value ...) with typed parsing of IP types from text source. [#45425](https://github.com/ClickHouse/ClickHouse/pull/45425) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* close [#45297](https://github.com/ClickHouse/ClickHouse/issues/45297) Add check for empty regular expressions. [#45428](https://github.com/ClickHouse/ClickHouse/pull/45428) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible (likely distributed) query hung. [#45448](https://github.com/ClickHouse/ClickHouse/pull/45448) ([Azat Khuzhin](https://github.com/azat)). +* Fix disabled two-level aggregation from HTTP. [#45450](https://github.com/ClickHouse/ClickHouse/pull/45450) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible deadlock with `allow_asynchronous_read_from_io_pool_for_merge_tree` enabled in case of exception from `ThreadPool::schedule`. [#45481](https://github.com/ClickHouse/ClickHouse/pull/45481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible in-use table after DETACH. [#45493](https://github.com/ClickHouse/ClickHouse/pull/45493) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare abort in case when query is canceled and parallel parsing was used during its execution. [#45498](https://github.com/ClickHouse/ClickHouse/pull/45498) ([Anton Popov](https://github.com/CurtizJ)). +* Fix a race between Distributed table creation and INSERT into it (could lead to CANNOT_LINK during INSERT into the table). [#45502](https://github.com/ClickHouse/ClickHouse/pull/45502) ([Azat Khuzhin](https://github.com/azat)). +* Add proper default (SLRU) to cache policy getter. Closes [#45514](https://github.com/ClickHouse/ClickHouse/issues/45514). [#45524](https://github.com/ClickHouse/ClickHouse/pull/45524) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove AST-based optimization `optimize_fuse_sum_count_avg`, close [#45439](https://github.com/ClickHouse/ClickHouse/issues/45439). [#45558](https://github.com/ClickHouse/ClickHouse/pull/45558) ([Vladimir C](https://github.com/vdimir)). + +#### Bug-fix + +* Disallow arrayjoin in mutations closes [#42637](https://github.com/ClickHouse/ClickHouse/issues/42637) Implementation: * Added a new parameter to ActionsVisitor::Data disallow_arrayjoin, which is set by MutationsIterator when it appends expression. * ActionsVisitor uses disallow_arrayjoin and throws error when its used with mutations. Testing: * Added test for the same 02504_disallow_arrayjoin_in_mutations.sql. [#44447](https://github.com/ClickHouse/ClickHouse/pull/44447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix for qualified asterisks with alias table name and column transformer resolves [#44736](https://github.com/ClickHouse/ClickHouse/issues/44736). [#44755](https://github.com/ClickHouse/ClickHouse/pull/44755) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Updated backup/restore status when concurrent backups & restores are not allowed resolves [#45486](https://github.com/ClickHouse/ClickHouse/issues/45486) Implementation: * Moved concurrent backup/restore check inside try-catch block which sets the status so that other nodes in cluster are aware of failures. * Renamed backup_uuid to restore_uuid in RestoreSettings. [#45497](https://github.com/ClickHouse/ClickHouse/pull/45497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +#### Build Improvement + +* crc32 fix for s390x. [#43706](https://github.com/ClickHouse/ClickHouse/pull/43706) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fixed endian issues in transform function for s390x. [#45522](https://github.com/ClickHouse/ClickHouse/pull/45522) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### Feature + +* Record server startup time in ProfileEvents resolves [#43188](https://github.com/ClickHouse/ClickHouse/issues/43188) Implementation: * Added ProfileEvents::ServerStartupMilliseconds. * Recorded time from start of main till listening to sockets. Testing: * Added a test 02532_profileevents_server_startup_time.sql. [#45250](https://github.com/ClickHouse/ClickHouse/pull/45250) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "If user only need virtual columns, we don't need to initialize ReadBufferFromS3"'. [#44939](https://github.com/ClickHouse/ClickHouse/pull/44939) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Revert "Custom reading for mutation"'. [#45121](https://github.com/ClickHouse/ClickHouse/pull/45121) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "Custom reading for mutation""'. [#45122](https://github.com/ClickHouse/ClickHouse/pull/45122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "update function DAYOFWEEK and add new function WEEKDAY for mysql/spark compatiability"'. [#45221](https://github.com/ClickHouse/ClickHouse/pull/45221) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Validate function arguments in query tree"'. [#45299](https://github.com/ClickHouse/ClickHouse/pull/45299) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Revert "Validate function arguments in query tree""'. [#45300](https://github.com/ClickHouse/ClickHouse/pull/45300) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Support optimize_or_like_chain in QueryTreePassManager"'. [#45406](https://github.com/ClickHouse/ClickHouse/pull/45406) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Resubmit Support optimize_or_like_chain in QueryTreePassManager'. [#45410](https://github.com/ClickHouse/ClickHouse/pull/45410) ([Dmitry Novik](https://github.com/novikd)). +* NO CL ENTRY: 'Revert "Remove redundant sorting"'. [#45414](https://github.com/ClickHouse/ClickHouse/pull/45414) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix assertion in async read buffer from remote [#41231](https://github.com/ClickHouse/ClickHouse/pull/41231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* add retries on ConnectionError [#42991](https://github.com/ClickHouse/ClickHouse/pull/42991) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update aws-c* submodules [#43020](https://github.com/ClickHouse/ClickHouse/pull/43020) ([Vitaly Baranov](https://github.com/vitlibar)). +* Replace domain IP types (IPv4, IPv6) with native [#43221](https://github.com/ClickHouse/ClickHouse/pull/43221) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix aggregate functions optimisation in AggregateFunctionsArithmericOperationsPass [#43372](https://github.com/ClickHouse/ClickHouse/pull/43372) ([Dmitry Novik](https://github.com/novikd)). +* Improve pytest --pdb experience by preserving dockerd on SIGINT [#43392](https://github.com/ClickHouse/ClickHouse/pull/43392) ([Azat Khuzhin](https://github.com/azat)). +* RFC: tests: add stacktraces for hunged queries [#43396](https://github.com/ClickHouse/ClickHouse/pull/43396) ([Azat Khuzhin](https://github.com/azat)). +* Followup fixes for systemd notification ([#43400](https://github.com/ClickHouse/ClickHouse/issues/43400)) [#43597](https://github.com/ClickHouse/ClickHouse/pull/43597) ([Alexander Gololobov](https://github.com/davenger)). +* Refactor FunctionNode [#43761](https://github.com/ClickHouse/ClickHouse/pull/43761) ([Dmitry Novik](https://github.com/novikd)). +* Some cleanup: grace hash join [#43851](https://github.com/ClickHouse/ClickHouse/pull/43851) ([Igor Nikonov](https://github.com/devcrafter)). +* Temporary files evict fs cache - 2nd approach [#43972](https://github.com/ClickHouse/ClickHouse/pull/43972) ([Vladimir C](https://github.com/vdimir)). +* Randomize setting `enable_memory_bound_merging_of_aggregation_results` in tests [#43986](https://github.com/ClickHouse/ClickHouse/pull/43986) ([Nikita Taranov](https://github.com/nickitat)). +* Analyzer aggregate functions passes small fixes [#44013](https://github.com/ClickHouse/ClickHouse/pull/44013) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix wrong char in command [#44018](https://github.com/ClickHouse/ClickHouse/pull/44018) ([alesapin](https://github.com/alesapin)). +* Analyzer support Set index [#44097](https://github.com/ClickHouse/ClickHouse/pull/44097) ([Maksim Kita](https://github.com/kitaisreal)). +* Provide monotonicity info for `toUnixTimestamp64*` [#44116](https://github.com/ClickHouse/ClickHouse/pull/44116) ([Nikita Taranov](https://github.com/nickitat)). +* Avoid loading toolchain files multiple times [#44122](https://github.com/ClickHouse/ClickHouse/pull/44122) ([Azat Khuzhin](https://github.com/azat)). +* tests: exclude flaky columns from SHOW CLUSTERS test [#44123](https://github.com/ClickHouse/ClickHouse/pull/44123) ([Azat Khuzhin](https://github.com/azat)). +* Bump libdivide (to gain some new optimizations) [#44132](https://github.com/ClickHouse/ClickHouse/pull/44132) ([Azat Khuzhin](https://github.com/azat)). +* Make atomic counter relaxed in blockNumber() [#44193](https://github.com/ClickHouse/ClickHouse/pull/44193) ([Igor Nikonov](https://github.com/devcrafter)). +* Try fix flaky 01072_window_view_multiple_columns_groupby [#44195](https://github.com/ClickHouse/ClickHouse/pull/44195) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Apply new code of named collections (from [#43147](https://github.com/ClickHouse/ClickHouse/issues/43147)) to external table engines part 1 [#44204](https://github.com/ClickHouse/ClickHouse/pull/44204) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add some settings under `compatibility` [#44209](https://github.com/ClickHouse/ClickHouse/pull/44209) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Recommend Slack over Telegram in the "Question" issue template [#44222](https://github.com/ClickHouse/ClickHouse/pull/44222) ([Ivan Blinkov](https://github.com/blinkov)). +* Forbid paths in timezone names [#44225](https://github.com/ClickHouse/ClickHouse/pull/44225) ([Kruglov Pavel](https://github.com/Avogar)). +* Analyzer storage view crash fix [#44230](https://github.com/ClickHouse/ClickHouse/pull/44230) ([Maksim Kita](https://github.com/kitaisreal)). +* Add ThreadsInOvercommitTracker metric [#44233](https://github.com/ClickHouse/ClickHouse/pull/44233) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer expired Context crash fix [#44234](https://github.com/ClickHouse/ClickHouse/pull/44234) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix grace join memory consumption, pt1 [#44238](https://github.com/ClickHouse/ClickHouse/pull/44238) ([Vladimir C](https://github.com/vdimir)). +* Fixed use-after-free of BLAKE3 error message [#44242](https://github.com/ClickHouse/ClickHouse/pull/44242) ([Joanna Hulboj](https://github.com/jh0x)). +* Fix deadlock in StorageSystemDatabases [#44272](https://github.com/ClickHouse/ClickHouse/pull/44272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of global Git object [#44273](https://github.com/ClickHouse/ClickHouse/pull/44273) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version after release [#44275](https://github.com/ClickHouse/ClickHouse/pull/44275) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.12.1.1752-stable [#44281](https://github.com/ClickHouse/ClickHouse/pull/44281) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Do not hold data parts during insert [#44299](https://github.com/ClickHouse/ClickHouse/pull/44299) ([Anton Popov](https://github.com/CurtizJ)). +* Another fix `test_server_reload` [#44306](https://github.com/ClickHouse/ClickHouse/pull/44306) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v22.9.7.34-stable [#44309](https://github.com/ClickHouse/ClickHouse/pull/44309) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* tests/perf: fix dependency check during DROP [#44312](https://github.com/ClickHouse/ClickHouse/pull/44312) ([Azat Khuzhin](https://github.com/azat)). +* (unused openssl integration, not for production) a follow-up [#44325](https://github.com/ClickHouse/ClickHouse/pull/44325) ([Boris Kuschel](https://github.com/bkuschel)). +* Replace old named collections code with new (from [#43147](https://github.com/ClickHouse/ClickHouse/issues/43147)) part 2 [#44327](https://github.com/ClickHouse/ClickHouse/pull/44327) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable "git-import" test in debug mode [#44328](https://github.com/ClickHouse/ClickHouse/pull/44328) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check s3 part upload settings [#44335](https://github.com/ClickHouse/ClickHouse/pull/44335) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix typo [#44337](https://github.com/ClickHouse/ClickHouse/pull/44337) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for PowerBI [#44338](https://github.com/ClickHouse/ClickHouse/pull/44338) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#36038](https://github.com/ClickHouse/ClickHouse/issues/36038) [#44339](https://github.com/ClickHouse/ClickHouse/pull/44339) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#29386](https://github.com/ClickHouse/ClickHouse/issues/29386) [#44340](https://github.com/ClickHouse/ClickHouse/pull/44340) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#22929](https://github.com/ClickHouse/ClickHouse/issues/22929) [#44341](https://github.com/ClickHouse/ClickHouse/pull/44341) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#29883](https://github.com/ClickHouse/ClickHouse/issues/29883) [#44342](https://github.com/ClickHouse/ClickHouse/pull/44342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Docker [#44343](https://github.com/ClickHouse/ClickHouse/pull/44343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix flack test "02481_async_insert_dedup.python" [#44349](https://github.com/ClickHouse/ClickHouse/pull/44349) ([Han Fei](https://github.com/hanfei1991)). +* Add a test for [#22160](https://github.com/ClickHouse/ClickHouse/issues/22160) [#44355](https://github.com/ClickHouse/ClickHouse/pull/44355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34708](https://github.com/ClickHouse/ClickHouse/issues/34708) [#44356](https://github.com/ClickHouse/ClickHouse/pull/44356) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#30679](https://github.com/ClickHouse/ClickHouse/issues/30679) [#44357](https://github.com/ClickHouse/ClickHouse/pull/44357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34669](https://github.com/ClickHouse/ClickHouse/issues/34669) [#44359](https://github.com/ClickHouse/ClickHouse/pull/44359) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34724](https://github.com/ClickHouse/ClickHouse/issues/34724) [#44360](https://github.com/ClickHouse/ClickHouse/pull/44360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try restarting ZK cluster on failed connection in `test_keeper_zookeeper_converted` [#44363](https://github.com/ClickHouse/ClickHouse/pull/44363) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable grase_hash in test 00172_parallel_join [#44367](https://github.com/ClickHouse/ClickHouse/pull/44367) ([Vladimir C](https://github.com/vdimir)). +* Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Lock table for share during startup for database ordinary [#44393](https://github.com/ClickHouse/ClickHouse/pull/44393) ([alesapin](https://github.com/alesapin)). +* Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try fix some tests [#44406](https://github.com/ClickHouse/ClickHouse/pull/44406) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better ParserAllCollectionsOfLiterals [#44408](https://github.com/ClickHouse/ClickHouse/pull/44408) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix bug with merge/mutate pool size increase [#44436](https://github.com/ClickHouse/ClickHouse/pull/44436) ([alesapin](https://github.com/alesapin)). +* Update 01072_window_view_multiple_columns_groupby.sh [#44438](https://github.com/ClickHouse/ClickHouse/pull/44438) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable buggy tsan assertion for integration test [#44444](https://github.com/ClickHouse/ClickHouse/pull/44444) ([alesapin](https://github.com/alesapin)). +* Respect setting settings.schema_inference_make_columns_nullable in Parquet/ORC/Arrow formats [#44446](https://github.com/ClickHouse/ClickHouse/pull/44446) ([Kruglov Pavel](https://github.com/Avogar)). +* Add tests as examples with errors of date(time) and string comparison that we should eliminate [#44462](https://github.com/ClickHouse/ClickHouse/pull/44462) ([Ilya Yatsishin](https://github.com/qoega)). +* Parallel parts cleanup with zero copy replication [#44466](https://github.com/ClickHouse/ClickHouse/pull/44466) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect usages of `getPartName()` [#44468](https://github.com/ClickHouse/ClickHouse/pull/44468) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test `roaring_memory_tracking` [#44470](https://github.com/ClickHouse/ClickHouse/pull/44470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clarify query_id in test 01092_memory_profiler [#44483](https://github.com/ClickHouse/ClickHouse/pull/44483) ([Vladimir C](https://github.com/vdimir)). +* Default value for optional in SortNode::updateTreeHashImpl [#44491](https://github.com/ClickHouse/ClickHouse/pull/44491) ([Vladimir C](https://github.com/vdimir)). +* Do not try to remove WAL/move broken parts for static storage [#44495](https://github.com/ClickHouse/ClickHouse/pull/44495) ([Azat Khuzhin](https://github.com/azat)). +* Removed parent pid check that breaks in containers [#44499](https://github.com/ClickHouse/ClickHouse/pull/44499) ([Alexander Gololobov](https://github.com/davenger)). +* Analyzer duplicate alias crash fix [#44508](https://github.com/ClickHouse/ClickHouse/pull/44508) ([Maksim Kita](https://github.com/kitaisreal)). +* Minor code polishing [#44513](https://github.com/ClickHouse/ClickHouse/pull/44513) ([alesapin](https://github.com/alesapin)). +* Better error message if named collection does not exist [#44517](https://github.com/ClickHouse/ClickHouse/pull/44517) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add the lambda to collect data for workflow_jobs [#44520](https://github.com/ClickHouse/ClickHouse/pull/44520) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Introduce groupArrayLast() (useful to store last X values) [#44521](https://github.com/ClickHouse/ClickHouse/pull/44521) ([Azat Khuzhin](https://github.com/azat)). +* Infer numbers starting from zero as strings in TSV [#44522](https://github.com/ClickHouse/ClickHouse/pull/44522) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong condition for enabling async reading from MergeTree. [#44530](https://github.com/ClickHouse/ClickHouse/pull/44530) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* tests: capture dmesg in integration tests [#44535](https://github.com/ClickHouse/ClickHouse/pull/44535) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer support distributed queries processing [#44540](https://github.com/ClickHouse/ClickHouse/pull/44540) ([Maksim Kita](https://github.com/kitaisreal)). +* Followup [#43761](https://github.com/ClickHouse/ClickHouse/issues/43761) [#44541](https://github.com/ClickHouse/ClickHouse/pull/44541) ([Dmitry Novik](https://github.com/novikd)). +* Drop unused columns after join on/using [#44545](https://github.com/ClickHouse/ClickHouse/pull/44545) ([Vladimir C](https://github.com/vdimir)). +* Improve inferring arrays with nulls in JSON formats [#44550](https://github.com/ClickHouse/ClickHouse/pull/44550) ([Kruglov Pavel](https://github.com/Avogar)). +* Make BC check optional (if env var set) [#44564](https://github.com/ClickHouse/ClickHouse/pull/44564) ([alesapin](https://github.com/alesapin)). +* Fix extremely slow stack traces in debug build [#44569](https://github.com/ClickHouse/ClickHouse/pull/44569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better command line argument name in `clickhouse-benchmark` [#44570](https://github.com/ClickHouse/ClickHouse/pull/44570) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix HDFS test [#44572](https://github.com/ClickHouse/ClickHouse/pull/44572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_distributed_queries_stress [#44573](https://github.com/ClickHouse/ClickHouse/pull/44573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Switch "contrib/sysroot" back to master. [#44574](https://github.com/ClickHouse/ClickHouse/pull/44574) ([Vitaly Baranov](https://github.com/vitlibar)). +* Non-significant changes [#44575](https://github.com/ClickHouse/ClickHouse/pull/44575) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fuzzer HTML: fix trash [#44580](https://github.com/ClickHouse/ClickHouse/pull/44580) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better diagnostics on server stop for the stress test [#44593](https://github.com/ClickHouse/ClickHouse/pull/44593) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The position of the log message about the server environment was wrong [#44595](https://github.com/ClickHouse/ClickHouse/pull/44595) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad punctuation in log [#44596](https://github.com/ClickHouse/ClickHouse/pull/44596) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix misleading log message [#44598](https://github.com/ClickHouse/ClickHouse/pull/44598) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad log message about MergeTree metadata cache. [#44599](https://github.com/ClickHouse/ClickHouse/pull/44599) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly cleanup interactive line reader code [#44601](https://github.com/ClickHouse/ClickHouse/pull/44601) ([Azat Khuzhin](https://github.com/azat)). +* Rename `runlog.log` to `run.log` in tests [#44603](https://github.com/ClickHouse/ClickHouse/pull/44603) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix hung query in stress test [#44604](https://github.com/ClickHouse/ClickHouse/pull/44604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve variable name [#44605](https://github.com/ClickHouse/ClickHouse/pull/44605) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Faster server startup after stress test [#44606](https://github.com/ClickHouse/ClickHouse/pull/44606) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix log messages in Coordination [#44607](https://github.com/ClickHouse/ClickHouse/pull/44607) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable Analyzer in fuzz and stress tests [#44609](https://github.com/ClickHouse/ClickHouse/pull/44609) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better log message [#44610](https://github.com/ClickHouse/ClickHouse/pull/44610) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe fix a bogus MSan error [#44611](https://github.com/ClickHouse/ClickHouse/pull/44611) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "too large allocation" message from MSan [#44613](https://github.com/ClickHouse/ClickHouse/pull/44613) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not fail the AST fuzzer if sanitizer is out of memory [#44616](https://github.com/ClickHouse/ClickHouse/pull/44616) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01111_create_drop_replicated_db_stress` [#44617](https://github.com/ClickHouse/ClickHouse/pull/44617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* tests/integration: suppress exceptions during logging (due to pytest) [#44618](https://github.com/ClickHouse/ClickHouse/pull/44618) ([Azat Khuzhin](https://github.com/azat)). +* Fix rust modules rebuild (previously ignores changes in cargo config.toml) [#44623](https://github.com/ClickHouse/ClickHouse/pull/44623) ([Azat Khuzhin](https://github.com/azat)). +* Sometimes spot instances fail more than 20 times in a row [#44626](https://github.com/ClickHouse/ClickHouse/pull/44626) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix restart after quorum insert [#44628](https://github.com/ClickHouse/ClickHouse/pull/44628) ([alesapin](https://github.com/alesapin)). +* Revert "Merge pull request [#38953](https://github.com/ClickHouse/ClickHouse/issues/38953) from ClickHouse/add-allocation-ptr-to-trace-log [#44629](https://github.com/ClickHouse/ClickHouse/pull/44629) ([Raúl Marín](https://github.com/Algunenano)). +* Fix lambdas parsing [#44639](https://github.com/ClickHouse/ClickHouse/pull/44639) ([Nikolay Degterinsky](https://github.com/evillique)). +* Function viewExplain accept SELECT and settings [#44641](https://github.com/ClickHouse/ClickHouse/pull/44641) ([Vladimir C](https://github.com/vdimir)). +* Fix test `02015_async_inserts_2` [#44642](https://github.com/ClickHouse/ClickHouse/pull/44642) ([Anton Popov](https://github.com/CurtizJ)). +* Fix flaky test `test_keeper_multinode_simple` [#44645](https://github.com/ClickHouse/ClickHouse/pull/44645) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add +x flag for run-fuzzer.sh [#44649](https://github.com/ClickHouse/ClickHouse/pull/44649) ([alesapin](https://github.com/alesapin)). +* Custom reading for mutation [#44653](https://github.com/ClickHouse/ClickHouse/pull/44653) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix flaky test test_backup_restore_on_cluster [#44660](https://github.com/ClickHouse/ClickHouse/pull/44660) ([Vitaly Baranov](https://github.com/vitlibar)). +* tests/integration: add missing kazoo client termination [#44666](https://github.com/ClickHouse/ClickHouse/pull/44666) ([Azat Khuzhin](https://github.com/azat)). +* Move dmesg dumping out from runner to ci-runner.py [#44667](https://github.com/ClickHouse/ClickHouse/pull/44667) ([Azat Khuzhin](https://github.com/azat)). +* Remove questdb (it makes a little sense but the test was flaky) [#44669](https://github.com/ClickHouse/ClickHouse/pull/44669) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix minor typo: replace validate_bugix_check with validate_bugfix_check [#44672](https://github.com/ClickHouse/ClickHouse/pull/44672) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Fix parsing of ANY operator [#44678](https://github.com/ClickHouse/ClickHouse/pull/44678) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix test `01130_in_memory_parts` [#44683](https://github.com/ClickHouse/ClickHouse/pull/44683) ([Anton Popov](https://github.com/CurtizJ)). +* Remove old code [#44685](https://github.com/ClickHouse/ClickHouse/pull/44685) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test git-import [#44687](https://github.com/ClickHouse/ClickHouse/pull/44687) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve odbc test [#44688](https://github.com/ClickHouse/ClickHouse/pull/44688) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add retries to HTTP requests in ClickHouse test [#44689](https://github.com/ClickHouse/ClickHouse/pull/44689) ([alesapin](https://github.com/alesapin)). +* Fix flaky tests [#44690](https://github.com/ClickHouse/ClickHouse/pull/44690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix flaky test "01502_long_log_tinylog_deadlock_race" [#44693](https://github.com/ClickHouse/ClickHouse/pull/44693) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve handling of old parts [#44694](https://github.com/ClickHouse/ClickHouse/pull/44694) ([Raúl Marín](https://github.com/Algunenano)). +* Update entrypoint.sh [#44699](https://github.com/ClickHouse/ClickHouse/pull/44699) ([Denny Crane](https://github.com/den-crane)). +* tests: more fixes for test_keeper_auth [#44702](https://github.com/ClickHouse/ClickHouse/pull/44702) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash on delete from materialized view [#44705](https://github.com/ClickHouse/ClickHouse/pull/44705) ([Alexander Gololobov](https://github.com/davenger)). +* Fix flaky filelog tests with database ordinary [#44706](https://github.com/ClickHouse/ClickHouse/pull/44706) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make lightweight deletes always synchronous [#44718](https://github.com/ClickHouse/ClickHouse/pull/44718) ([Alexander Gololobov](https://github.com/davenger)). +* Fix deadlock in attach thread [#44719](https://github.com/ClickHouse/ClickHouse/pull/44719) ([alesapin](https://github.com/alesapin)). +* A few improvements to AST Fuzzer [#44720](https://github.com/ClickHouse/ClickHouse/pull/44720) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test [#44721](https://github.com/ClickHouse/ClickHouse/pull/44721) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename log in stress test [#44722](https://github.com/ClickHouse/ClickHouse/pull/44722) ([alesapin](https://github.com/alesapin)). +* Debug deadlock in stress test [#44723](https://github.com/ClickHouse/ClickHouse/pull/44723) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test "02102_row_binary_with_names_and_types.sh" [#44724](https://github.com/ClickHouse/ClickHouse/pull/44724) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better some tests [#44725](https://github.com/ClickHouse/ClickHouse/pull/44725) ([alesapin](https://github.com/alesapin)). +* Fix cases when clickhouse-server takes long time to start in functional tests with MSan [#44726](https://github.com/ClickHouse/ClickHouse/pull/44726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Perf test: Log the time spent waiting for file sync [#44737](https://github.com/ClickHouse/ClickHouse/pull/44737) ([Raúl Marín](https://github.com/Algunenano)). +* Fix flaky test 02448_clone_replica_lost_part [#44759](https://github.com/ClickHouse/ClickHouse/pull/44759) ([alesapin](https://github.com/alesapin)). +* Build rust modules from the binary directory [#44762](https://github.com/ClickHouse/ClickHouse/pull/44762) ([Azat Khuzhin](https://github.com/azat)). +* Remove database ordinary from stress test [#44763](https://github.com/ClickHouse/ClickHouse/pull/44763) ([alesapin](https://github.com/alesapin)). +* Fix flaky test 02479_mysql_connect_to_self [#44768](https://github.com/ClickHouse/ClickHouse/pull/44768) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print fatal messages in Fuzzer [#44769](https://github.com/ClickHouse/ClickHouse/pull/44769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect docs [#44795](https://github.com/ClickHouse/ClickHouse/pull/44795) ([Kruglov Pavel](https://github.com/Avogar)). +* Added table name to error message [#44806](https://github.com/ClickHouse/ClickHouse/pull/44806) ([Alexander Gololobov](https://github.com/davenger)). +* Retry packages download if GitHub returned HTTP 500. [#44807](https://github.com/ClickHouse/ClickHouse/pull/44807) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better docs [#44808](https://github.com/ClickHouse/ClickHouse/pull/44808) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix total trash in stress test [#44810](https://github.com/ClickHouse/ClickHouse/pull/44810) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ASan builds for glibc 2.36+ [#44811](https://github.com/ClickHouse/ClickHouse/pull/44811) ([Azat Khuzhin](https://github.com/azat)). +* Remove the remainings of TestFlows [#44812](https://github.com/ClickHouse/ClickHouse/pull/44812) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `grep` [#44813](https://github.com/ClickHouse/ClickHouse/pull/44813) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad cast in monotonicity analysis [#44818](https://github.com/ClickHouse/ClickHouse/pull/44818) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modern tools, part 1 [#44819](https://github.com/ClickHouse/ClickHouse/pull/44819) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modern tools in CI, part 2. [#44820](https://github.com/ClickHouse/ClickHouse/pull/44820) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in DDLWorker [#44821](https://github.com/ClickHouse/ClickHouse/pull/44821) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix tests for bridges [#44822](https://github.com/ClickHouse/ClickHouse/pull/44822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test_multiple_disks::test_jbod_overflow [#44823](https://github.com/ClickHouse/ClickHouse/pull/44823) ([Azat Khuzhin](https://github.com/azat)). +* Less OOM in stress test [#44824](https://github.com/ClickHouse/ClickHouse/pull/44824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix misleading integration tests reports for parametrized tests [#44825](https://github.com/ClickHouse/ClickHouse/pull/44825) ([Azat Khuzhin](https://github.com/azat)). +* Fix two typos [#44826](https://github.com/ClickHouse/ClickHouse/pull/44826) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adjust CSS [#44829](https://github.com/ClickHouse/ClickHouse/pull/44829) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fuzzer report [#44830](https://github.com/ClickHouse/ClickHouse/pull/44830) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* check-style: check base for std::cerr/cout too [#44833](https://github.com/ClickHouse/ClickHouse/pull/44833) ([Azat Khuzhin](https://github.com/azat)). +* Try fixing `test_keeper_snapshot_small_distance` with ZK restart [#44834](https://github.com/ClickHouse/ClickHouse/pull/44834) ([Antonio Andelic](https://github.com/antonio2368)). +* Exclude cargo shared libraries from the artifacts [#44836](https://github.com/ClickHouse/ClickHouse/pull/44836) ([Azat Khuzhin](https://github.com/azat)). +* Add a tiny but important logging [#44837](https://github.com/ClickHouse/ClickHouse/pull/44837) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Escape submodules in style-check [#44838](https://github.com/ClickHouse/ClickHouse/pull/44838) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Move `test_dies_with_parent` to another module [#44839](https://github.com/ClickHouse/ClickHouse/pull/44839) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove unneeded softlink to official dev docs [#44841](https://github.com/ClickHouse/ClickHouse/pull/44841) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix data race in StorageS3 [#44842](https://github.com/ClickHouse/ClickHouse/pull/44842) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix rare race which can lead to queue hang [#44847](https://github.com/ClickHouse/ClickHouse/pull/44847) ([alesapin](https://github.com/alesapin)). +* No more retries in integration tests [#44851](https://github.com/ClickHouse/ClickHouse/pull/44851) ([Ilya Yatsishin](https://github.com/qoega)). +* Document usage of check_cxx_source_compiles instead of check_cxx_source_runs [#44854](https://github.com/ClickHouse/ClickHouse/pull/44854) ([Robert Schulze](https://github.com/rschu1ze)). +* More cases of OOM in Fuzzer [#44855](https://github.com/ClickHouse/ClickHouse/pull/44855) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix: sorted DISTINCT with empty string [#44856](https://github.com/ClickHouse/ClickHouse/pull/44856) ([Igor Nikonov](https://github.com/devcrafter)). +* Try to fix MSan build [#44857](https://github.com/ClickHouse/ClickHouse/pull/44857) ([Nikolay Degterinsky](https://github.com/evillique)). +* Cleanup setup_minio.sh [#44858](https://github.com/ClickHouse/ClickHouse/pull/44858) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Wait for ZK process to stop in tests using snapshot [#44859](https://github.com/ClickHouse/ClickHouse/pull/44859) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test and several typos [#44870](https://github.com/ClickHouse/ClickHouse/pull/44870) ([alesapin](https://github.com/alesapin)). +* Upload status files to S3 report for bugfix check [#44871](https://github.com/ClickHouse/ClickHouse/pull/44871) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky test `02503_insert_storage_snapshot` [#44873](https://github.com/ClickHouse/ClickHouse/pull/44873) ([alesapin](https://github.com/alesapin)). +* Revert some changes from [#42777](https://github.com/ClickHouse/ClickHouse/issues/42777) to fix performance tests [#44876](https://github.com/ClickHouse/ClickHouse/pull/44876) ([Kruglov Pavel](https://github.com/Avogar)). +* Rewrite test_postgres_protocol test [#44880](https://github.com/ClickHouse/ClickHouse/pull/44880) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix ConcurrentBoundedQueue::emplace() return value in case of finished queue [#44881](https://github.com/ClickHouse/ClickHouse/pull/44881) ([Azat Khuzhin](https://github.com/azat)). +* Validate function arguments in query tree [#44882](https://github.com/ClickHouse/ClickHouse/pull/44882) ([Dmitry Novik](https://github.com/novikd)). +* Rework CI reports to have a class and clarify the logic [#44883](https://github.com/ClickHouse/ClickHouse/pull/44883) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* fix-typo [#44886](https://github.com/ClickHouse/ClickHouse/pull/44886) ([Enrique Herreros](https://github.com/eherrerosj)). +* Store ZK generated data in `test_keeper_snapshot_small_distance` [#44888](https://github.com/ClickHouse/ClickHouse/pull/44888) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix "AttributeError: 'BuildResult' object has no attribute 'libraries'" in BuilderReport and BuilderSpecialReport [#44890](https://github.com/ClickHouse/ClickHouse/pull/44890) ([Robert Schulze](https://github.com/rschu1ze)). +* Convert integration test_dictionaries_update_field to a stateless [#44891](https://github.com/ClickHouse/ClickHouse/pull/44891) ([Azat Khuzhin](https://github.com/azat)). +* Upgrade googletest to latest HEAD [#44894](https://github.com/ClickHouse/ClickHouse/pull/44894) ([Robert Schulze](https://github.com/rschu1ze)). +* Try fix rabbitmq potential leak [#44897](https://github.com/ClickHouse/ClickHouse/pull/44897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Try to fix flaky `test_storage_kafka::test_kafka_produce_key_timestamp` [#44898](https://github.com/ClickHouse/ClickHouse/pull/44898) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky `test_concurrent_queries_restriction_by_query_kind` [#44903](https://github.com/ClickHouse/ClickHouse/pull/44903) ([Antonio Andelic](https://github.com/antonio2368)). +* Avoid Keeper crash on shutdown (fix `test_keeper_snapshot_on_exit`) [#44908](https://github.com/ClickHouse/ClickHouse/pull/44908) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not merge over a gap with outdated undeleted parts [#44909](https://github.com/ClickHouse/ClickHouse/pull/44909) ([Sema Checherinda](https://github.com/CheSema)). +* Fix logging message in MergeTreeDataMergerMutator (about merged parts) [#44917](https://github.com/ClickHouse/ClickHouse/pull/44917) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test `test_lost_part` [#44921](https://github.com/ClickHouse/ClickHouse/pull/44921) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add fast and cancellable shared_mutex alternatives [#44924](https://github.com/ClickHouse/ClickHouse/pull/44924) ([Sergei Trifonov](https://github.com/serxa)). +* Fix deadlock in Keeper's changelog [#44937](https://github.com/ClickHouse/ClickHouse/pull/44937) ([Antonio Andelic](https://github.com/antonio2368)). +* Stop merges to avoid a race between merge and freeze. [#44938](https://github.com/ClickHouse/ClickHouse/pull/44938) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix memory leak in Aws::InitAPI [#44942](https://github.com/ClickHouse/ClickHouse/pull/44942) ([Vitaly Baranov](https://github.com/vitlibar)). +* Change error code on invalid background_pool_size config [#44947](https://github.com/ClickHouse/ClickHouse/pull/44947) ([Raúl Marín](https://github.com/Algunenano)). +* Fix exception fix in TraceCollector dtor [#44948](https://github.com/ClickHouse/ClickHouse/pull/44948) ([Robert Schulze](https://github.com/rschu1ze)). +* Parallel distributed insert select with s3Cluster [3] [#44955](https://github.com/ClickHouse/ClickHouse/pull/44955) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not check read result consistency when unwinding [#44956](https://github.com/ClickHouse/ClickHouse/pull/44956) ([Alexander Gololobov](https://github.com/davenger)). +* Up the log level of tables dependencies graphs [#44957](https://github.com/ClickHouse/ClickHouse/pull/44957) ([Vitaly Baranov](https://github.com/vitlibar)). +* Hipster's HTML [#44961](https://github.com/ClickHouse/ClickHouse/pull/44961) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Docs: Mention non-standard DOTALL behavior of ClickHouse's match() [#44977](https://github.com/ClickHouse/ClickHouse/pull/44977) ([Robert Schulze](https://github.com/rschu1ze)). +* tests: fix test_replicated_users flakiness [#44978](https://github.com/ClickHouse/ClickHouse/pull/44978) ([Azat Khuzhin](https://github.com/azat)). +* Check what if disable some checks in storage Merge. [#44983](https://github.com/ClickHouse/ClickHouse/pull/44983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix check for not existing input in ActionsDAG [#44987](https://github.com/ClickHouse/ClickHouse/pull/44987) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v22.12.2.25-stable [#44988](https://github.com/ClickHouse/ClickHouse/pull/44988) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix test test_grpc_protocol/test.py::test_progress [#44996](https://github.com/ClickHouse/ClickHouse/pull/44996) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve S3 EC2 metadata tests [#45001](https://github.com/ClickHouse/ClickHouse/pull/45001) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix minmax_count_projection with _partition_value [#45003](https://github.com/ClickHouse/ClickHouse/pull/45003) ([Amos Bird](https://github.com/amosbird)). +* Fix strange trash in Fuzzer [#45006](https://github.com/ClickHouse/ClickHouse/pull/45006) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `dmesg.log` to Fuzzer [#45008](https://github.com/ClickHouse/ClickHouse/pull/45008) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `01961_roaring_memory_tracking` test, again [#45009](https://github.com/ClickHouse/ClickHouse/pull/45009) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Recognize more ok cases for Fuzzer [#45012](https://github.com/ClickHouse/ClickHouse/pull/45012) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Supposedly fix the "Download script failed" error [#45013](https://github.com/ClickHouse/ClickHouse/pull/45013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add snapshot creation retry in Keeper tests using ZooKeeper [#45016](https://github.com/ClickHouse/ClickHouse/pull/45016) ([Antonio Andelic](https://github.com/antonio2368)). +* test for [#20098](https://github.com/ClickHouse/ClickHouse/issues/20098) [#45017](https://github.com/ClickHouse/ClickHouse/pull/45017) ([Denny Crane](https://github.com/den-crane)). +* test for [#26473](https://github.com/ClickHouse/ClickHouse/issues/26473) [#45018](https://github.com/ClickHouse/ClickHouse/pull/45018) ([Denny Crane](https://github.com/den-crane)). +* Remove the remainings of Testflows (2). [#45021](https://github.com/ClickHouse/ClickHouse/pull/45021) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable the check that was commented [#45022](https://github.com/ClickHouse/ClickHouse/pull/45022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix false positive in Fuzzer [#45025](https://github.com/ClickHouse/ClickHouse/pull/45025) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix false positive in Fuzzer, alternative variant [#45026](https://github.com/ClickHouse/ClickHouse/pull/45026) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix function `range` (the bug was unreleased) [#45030](https://github.com/ClickHouse/ClickHouse/pull/45030) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix OOM in Fuzzer [#45032](https://github.com/ClickHouse/ClickHouse/pull/45032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less OOM in Stress test [#45033](https://github.com/ClickHouse/ClickHouse/pull/45033) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#31361](https://github.com/ClickHouse/ClickHouse/issues/31361) [#45034](https://github.com/ClickHouse/ClickHouse/pull/45034) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729) [#45035](https://github.com/ClickHouse/ClickHouse/pull/45035) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix typos [#45036](https://github.com/ClickHouse/ClickHouse/pull/45036) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* I didn't understand the logic of this test, @azat [#45037](https://github.com/ClickHouse/ClickHouse/pull/45037) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fixes for Coordination unit tests [#45039](https://github.com/ClickHouse/ClickHouse/pull/45039) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test (hilarious) [#45042](https://github.com/ClickHouse/ClickHouse/pull/45042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Non significant changes [#45046](https://github.com/ClickHouse/ClickHouse/pull/45046) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't fix parallel formatting [#45050](https://github.com/ClickHouse/ClickHouse/pull/45050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix (benign) data race in clickhouse-client [#45053](https://github.com/ClickHouse/ClickHouse/pull/45053) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer aggregation without column fix [#45055](https://github.com/ClickHouse/ClickHouse/pull/45055) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer ARRAY JOIN crash fix [#45059](https://github.com/ClickHouse/ClickHouse/pull/45059) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race in openSQLiteDB [#45062](https://github.com/ClickHouse/ClickHouse/pull/45062) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Analyzer function IN crash fix [#45064](https://github.com/ClickHouse/ClickHouse/pull/45064) ([Maksim Kita](https://github.com/kitaisreal)). +* JIT compilation float to bool conversion fix [#45067](https://github.com/ClickHouse/ClickHouse/pull/45067) ([Maksim Kita](https://github.com/kitaisreal)). +* Update version_date.tsv and changelogs after v22.11.3.47-stable [#45069](https://github.com/ClickHouse/ClickHouse/pull/45069) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.10.5.54-stable [#45071](https://github.com/ClickHouse/ClickHouse/pull/45071) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.3.16.1190-lts [#45073](https://github.com/ClickHouse/ClickHouse/pull/45073) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Change the color of links in dark reports a little bit [#45077](https://github.com/ClickHouse/ClickHouse/pull/45077) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix Fuzzer script [#45082](https://github.com/ClickHouse/ClickHouse/pull/45082) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try fixing KeeperMap tests [#45094](https://github.com/ClickHouse/ClickHouse/pull/45094) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v22.8.12.45-lts [#45098](https://github.com/ClickHouse/ClickHouse/pull/45098) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Try to fix flaky test_create_user_and_login/test.py::test_login_as_dropped_user_xml [#45099](https://github.com/ClickHouse/ClickHouse/pull/45099) ([Ilya Yatsishin](https://github.com/qoega)). +* Update version_date.tsv and changelogs after v22.10.6.3-stable [#45107](https://github.com/ClickHouse/ClickHouse/pull/45107) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Docs: Make heading consistent with other headings in System Table docs [#45109](https://github.com/ClickHouse/ClickHouse/pull/45109) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v22.11.4.3-stable [#45110](https://github.com/ClickHouse/ClickHouse/pull/45110) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.12.3.5-stable [#45113](https://github.com/ClickHouse/ClickHouse/pull/45113) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Docs: Rewrite awkwardly phrased sentence about flush interval [#45114](https://github.com/ClickHouse/ClickHouse/pull/45114) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix data race in s3Cluster. [#45123](https://github.com/ClickHouse/ClickHouse/pull/45123) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Pull SQLancer image before check run [#45125](https://github.com/ClickHouse/ClickHouse/pull/45125) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix flaky azure test [#45134](https://github.com/ClickHouse/ClickHouse/pull/45134) ([alesapin](https://github.com/alesapin)). +* Minor cleanup in stress/run.sh [#45136](https://github.com/ClickHouse/ClickHouse/pull/45136) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Performance report: "Partial queries" --> "Backward-incompatible queries [#45152](https://github.com/ClickHouse/ClickHouse/pull/45152) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky test_tcp_handler_interserver_listen_host [#45156](https://github.com/ClickHouse/ClickHouse/pull/45156) ([Ilya Yatsishin](https://github.com/qoega)). +* Clean trash from changelog for v22.3.16.1190-lts [#45159](https://github.com/ClickHouse/ClickHouse/pull/45159) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable `test_storage_rabbitmq` [#45161](https://github.com/ClickHouse/ClickHouse/pull/45161) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_ttl_move_memory_usage as too flaky. [#45162](https://github.com/ClickHouse/ClickHouse/pull/45162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* More logging to facilitate debugging of flaky test_ttl_replicated [#45165](https://github.com/ClickHouse/ClickHouse/pull/45165) ([Alexander Gololobov](https://github.com/davenger)). +* Try to fix flaky test_ttl_move_memory_usage [#45168](https://github.com/ClickHouse/ClickHouse/pull/45168) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test test_multiple_disks/test.py::test_rename [#45180](https://github.com/ClickHouse/ClickHouse/pull/45180) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Calculate only required columns in system.detached_parts [#45181](https://github.com/ClickHouse/ClickHouse/pull/45181) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Restart NightlyBuilds if the runner died [#45187](https://github.com/ClickHouse/ClickHouse/pull/45187) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix part ID generation for IP types for backward compatibility [#45191](https://github.com/ClickHouse/ClickHouse/pull/45191) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix integration test test_replicated_users::test_rename_replicated [#45192](https://github.com/ClickHouse/ClickHouse/pull/45192) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add CACHE_INVALIDATOR for sqlancer builds [#45201](https://github.com/ClickHouse/ClickHouse/pull/45201) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix possible stack-use-after-return in LimitReadBuffer [#45203](https://github.com/ClickHouse/ClickHouse/pull/45203) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable check to make test_overcommit_tracker not flaky [#45206](https://github.com/ClickHouse/ClickHouse/pull/45206) ([Dmitry Novik](https://github.com/novikd)). +* Fix flaky test `01961_roaring_memory_tracking` (3) [#45208](https://github.com/ClickHouse/ClickHouse/pull/45208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove trash from stress test [#45211](https://github.com/ClickHouse/ClickHouse/pull/45211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* remove unused function [#45212](https://github.com/ClickHouse/ClickHouse/pull/45212) ([flynn](https://github.com/ucasfl)). +* Fix flaky `test_keeper_three_nodes_two_alive` [#45213](https://github.com/ClickHouse/ClickHouse/pull/45213) ([Antonio Andelic](https://github.com/antonio2368)). +* Fuzz PREWHERE clause [#45222](https://github.com/ClickHouse/ClickHouse/pull/45222) ([Alexander Gololobov](https://github.com/davenger)). +* Added a test for merge join key condition with big int & decimal [#45228](https://github.com/ClickHouse/ClickHouse/pull/45228) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix rare logical error: `Too large alignment` [#45229](https://github.com/ClickHouse/ClickHouse/pull/45229) ([Anton Popov](https://github.com/CurtizJ)). +* Update version_date.tsv and changelogs after v22.3.17.13-lts [#45234](https://github.com/ClickHouse/ClickHouse/pull/45234) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* More verbose logs about replication log entries [#45235](https://github.com/ClickHouse/ClickHouse/pull/45235) ([Alexander Tokmakov](https://github.com/tavplubix)). +* One more attempt to fix race in TCPHandler [#45240](https://github.com/ClickHouse/ClickHouse/pull/45240) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update clickhouse-test [#45251](https://github.com/ClickHouse/ClickHouse/pull/45251) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Planner small fixes [#45254](https://github.com/ClickHouse/ClickHouse/pull/45254) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix log level "Test" for send_logs_level in client [#45273](https://github.com/ClickHouse/ClickHouse/pull/45273) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix clickhouse binaries detection [#45283](https://github.com/ClickHouse/ClickHouse/pull/45283) ([Azat Khuzhin](https://github.com/azat)). +* tests/ci: encode HTML entities in the reports [#45284](https://github.com/ClickHouse/ClickHouse/pull/45284) ([Azat Khuzhin](https://github.com/azat)). +* Disable `02151_hash_table_sizes_stats_distributed` under TSAN [#45287](https://github.com/ClickHouse/ClickHouse/pull/45287) ([Nikita Taranov](https://github.com/nickitat)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable 02028_create_select_settings with Ordinary [#45307](https://github.com/ClickHouse/ClickHouse/pull/45307) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Save message format strings for DB::Exception [#45342](https://github.com/ClickHouse/ClickHouse/pull/45342) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Slightly better output for glibc check [#45353](https://github.com/ClickHouse/ClickHouse/pull/45353) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add checks for compilation of regexps [#45356](https://github.com/ClickHouse/ClickHouse/pull/45356) ([Anton Popov](https://github.com/CurtizJ)). +* Analyzer compound identifier typo correction fix [#45357](https://github.com/ClickHouse/ClickHouse/pull/45357) ([Maksim Kita](https://github.com/kitaisreal)). +* Bump to newer version of debug-action [#45359](https://github.com/ClickHouse/ClickHouse/pull/45359) ([Ilya Yatsishin](https://github.com/qoega)). +* Improve failed kafka startup logging [#45369](https://github.com/ClickHouse/ClickHouse/pull/45369) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix flaky ttl test [#45370](https://github.com/ClickHouse/ClickHouse/pull/45370) ([alesapin](https://github.com/alesapin)). +* Add detailed profile events for throttling [#45373](https://github.com/ClickHouse/ClickHouse/pull/45373) ([Sergei Trifonov](https://github.com/serxa)). +* Update .gitignore [#45378](https://github.com/ClickHouse/ClickHouse/pull/45378) ([Nikolay Degterinsky](https://github.com/evillique)). +* Make test simpler to see errors [#45402](https://github.com/ClickHouse/ClickHouse/pull/45402) ([Ilya Yatsishin](https://github.com/qoega)). +* Reduce an amount of trash in `tests_system_merges` [#45403](https://github.com/ClickHouse/ClickHouse/pull/45403) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix reading from encrypted disk with passed file size [#45418](https://github.com/ClickHouse/ClickHouse/pull/45418) ([Anton Popov](https://github.com/CurtizJ)). +* Add delete by ttl for zookeeper_log [#45419](https://github.com/ClickHouse/ClickHouse/pull/45419) ([Nikita Taranov](https://github.com/nickitat)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Minor improvements around reading from remote [#45442](https://github.com/ClickHouse/ClickHouse/pull/45442) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Docs: Beautify section on secondary index types [#45444](https://github.com/ClickHouse/ClickHouse/pull/45444) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix Buffer's offsets mismatch logical error in stress test [#45446](https://github.com/ClickHouse/ClickHouse/pull/45446) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better formatting for exception messages [#45449](https://github.com/ClickHouse/ClickHouse/pull/45449) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add default GRANULARITY argument for secondary indexes [#45451](https://github.com/ClickHouse/ClickHouse/pull/45451) ([Nikolay Degterinsky](https://github.com/evillique)). +* Cleanup of inverted index [#45460](https://github.com/ClickHouse/ClickHouse/pull/45460) ([Robert Schulze](https://github.com/rschu1ze)). +* CherryPick: Fix a wrong staring search date [#45466](https://github.com/ClickHouse/ClickHouse/pull/45466) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix typos [#45470](https://github.com/ClickHouse/ClickHouse/pull/45470) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible aborts in arrow lib [#45478](https://github.com/ClickHouse/ClickHouse/pull/45478) ([Kruglov Pavel](https://github.com/Avogar)). +* Add more retries to AST Fuzzer [#45479](https://github.com/ClickHouse/ClickHouse/pull/45479) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix schema inference from insertion table in hdfsCluster [#45483](https://github.com/ClickHouse/ClickHouse/pull/45483) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove unnecessary getTotalRowCount function calls [#45485](https://github.com/ClickHouse/ClickHouse/pull/45485) ([Maksim Kita](https://github.com/kitaisreal)). +* Use new copy s3 functions in S3ObjectStorage [#45487](https://github.com/ClickHouse/ClickHouse/pull/45487) ([Vitaly Baranov](https://github.com/vitlibar)). +* Forward declaration of ConcurrentBoundedQueue in ThreadStatus [#45489](https://github.com/ClickHouse/ClickHouse/pull/45489) ([Azat Khuzhin](https://github.com/azat)). +* Revert "Merge pull request [#44922](https://github.com/ClickHouse/ClickHouse/issues/44922) from azat/dist/async-INSERT-metrics" [#45492](https://github.com/ClickHouse/ClickHouse/pull/45492) ([Azat Khuzhin](https://github.com/azat)). +* Docs: Fix weird formatting [#45495](https://github.com/ClickHouse/ClickHouse/pull/45495) ([Robert Schulze](https://github.com/rschu1ze)). +* Docs: Fix link to writing guide [#45496](https://github.com/ClickHouse/ClickHouse/pull/45496) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve logging for TeePopen.timeout exceeded [#45504](https://github.com/ClickHouse/ClickHouse/pull/45504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix MSan build once again (too heavy translation units) [#45512](https://github.com/ClickHouse/ClickHouse/pull/45512) ([Nikolay Degterinsky](https://github.com/evillique)). +* Additional check in MergeTreeReadPool [#45515](https://github.com/ClickHouse/ClickHouse/pull/45515) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update test_system_merges/test.py [#45516](https://github.com/ClickHouse/ClickHouse/pull/45516) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Merge pull request [#45493](https://github.com/ClickHouse/ClickHouse/issues/45493) from azat/fix-detach" [#45545](https://github.com/ClickHouse/ClickHouse/pull/45545) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update stress [#45546](https://github.com/ClickHouse/ClickHouse/pull/45546) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Ignore utf errors in clickhouse-test reportLogStats [#45556](https://github.com/ClickHouse/ClickHouse/pull/45556) ([Vladimir C](https://github.com/vdimir)). +* Resubmit "Fix possible in-use table after DETACH" [#45566](https://github.com/ClickHouse/ClickHouse/pull/45566) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Typo: "Granulesis" --> "Granules" [#45598](https://github.com/ClickHouse/ClickHouse/pull/45598) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix version in autogenerated_versions.txt [#45624](https://github.com/ClickHouse/ClickHouse/pull/45624) ([Dmitry Novik](https://github.com/novikd)). + diff --git a/docs/changelogs/v23.1.2.9-stable.md b/docs/changelogs/v23.1.2.9-stable.md new file mode 100644 index 00000000000..272a2b95a86 --- /dev/null +++ b/docs/changelogs/v23.1.2.9-stable.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.2.9-stable (8dfb1700858) FIXME as compared to v23.1.1.3077-stable (dcaac477025) + +#### Performance Improvement +* Backported in [#45705](https://github.com/ClickHouse/ClickHouse/issues/45705): Fixed performance of short `SELECT` queries that read from tables with large number of`Array`/`Map`/`Nested` columns. [#45630](https://github.com/ClickHouse/ClickHouse/pull/45630) ([Anton Popov](https://github.com/CurtizJ)). + +#### Bug Fix +* Backported in [#45673](https://github.com/ClickHouse/ClickHouse/issues/45673): Fix wiping sensitive info in logs. [#45603](https://github.com/ClickHouse/ClickHouse/pull/45603) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#45730](https://github.com/ClickHouse/ClickHouse/issues/45730): Fix key description when encountering duplicate primary keys. This can happen in projections. See [#45590](https://github.com/ClickHouse/ClickHouse/issues/45590) for details. [#45686](https://github.com/ClickHouse/ClickHouse/pull/45686) ([Amos Bird](https://github.com/amosbird)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Trim refs/tags/ from GITHUB_TAG in release workflow [#45636](https://github.com/ClickHouse/ClickHouse/pull/45636) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index f6b8e3a1da6..7c04a6594a6 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -21,6 +21,13 @@ ENGINE = HDFS(URI, format) `SELECT` queries, the format must be supported for input, and to perform `INSERT` queries – for output. The available formats are listed in the [Formats](../../../interfaces/formats.md#formats) section. +- [PARTITION BY expr] + +### PARTITION BY + +`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. **Example:** diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 245bd6f4468..723425429a5 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -13,6 +13,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, [compression]) + [PARTITION BY expr] [SETTINGS ...] ``` @@ -23,6 +24,12 @@ CREATE TABLE s3_engine_table (name String, value UInt32) - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). - `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. +### PARTITION BY + +`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. + **Example** ``` sql diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md index 693902b7d9b..2899476b847 100644 --- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md @@ -1,15 +1,22 @@ -# Inverted indexes [experimental] {#table_engines-ANNIndex} +--- +slug: /en/engines/table-engines/mergetree-family/invertedindexes +sidebar_label: Inverted Indexes +description: Quickly find search terms in text. +keywords: [full-text search, text search] +--- -Inverted indexes are an experimental type of [secondary indexes](mergetree.md#available-types-of-indices) which provide fast text search -capabilities for [String](../../../sql-reference/data-types/string.md) or [FixedString](../../../sql-reference/data-types/fixedstring.md) -columns. The main idea of an inverted indexes is to store a mapping from "terms" to the rows which contains these terms. "Terms" are -tokenized cells of the string column. For example, string cell "I will be a little late" is by default tokenized into six terms "I", "will", -"be", "a", "little" and "late". Another kind of tokenizer are n-grams. For example, the result of 3-gram tokenization will be 21 terms "I w", +# Inverted indexes [experimental] + +Inverted indexes are an experimental type of [secondary indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#available-types-of-indices) which provide fast text search +capabilities for [String](/docs/en/sql-reference/data-types/string.md) or [FixedString](/docs/en/sql-reference/data-types/fixedstring.md) +columns. The main idea of an inverted index is to store a mapping from "terms" to the rows which contain these terms. "Terms" are +tokenized cells of the string column. For example, the string cell "I will be a little late" is by default tokenized into six terms "I", "will", +"be", "a", "little" and "late". Another kind of tokenizer is n-grams. For example, the result of 3-gram tokenization will be 21 terms "I w", " wi", "wil", "ill", "ll ", "l b", " be" etc. The more fine-granular the input strings are tokenized, the bigger but also the more useful the resulting inverted index will be. :::warning -Inverted indexes are experimental and should not be used in production environment yet. They may change in future in backwards-incompatible +Inverted indexes are experimental and should not be used in production environments yet. They may change in the future in backward-incompatible ways, for example with respect to their DDL/DQL syntax or performance/compression characteristics. ::: @@ -24,7 +31,14 @@ SET allow_experimental_inverted_index = true; An inverted index can be defined on a string column using the following syntax ``` sql -CREATE TABLE tab (key UInt64, str String, INDEX inv_idx(s) TYPE inverted(N) GRANULARITY 1) Engine=MergeTree ORDER BY (k); +CREATE TABLE tab +( + `key` UInt64, + `str` String, + INDEX inv_idx(str) TYPE inverted(0) GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY key ``` where `N` specifies the tokenizer: @@ -32,35 +46,35 @@ where `N` specifies the tokenizer: - `inverted(0)` (or shorter: `inverted()`) set the tokenizer to "tokens", i.e. split strings along spaces, - `inverted(N)` with `N` between 2 and 8 sets the tokenizer to "ngrams(N)" -Being a type of skipping indexes, inverted indexes can be dropped or added to a column after table creation: +Being a type of skipping index, inverted indexes can be dropped or added to a column after table creation: ``` sql -ALTER TABLE tbl DROP INDEX inv_idx; -ALTER TABLE tbl ADD INDEX inv_idx(s) TYPE inverted(2) GRANULARITY 1; +ALTER TABLE tab DROP INDEX inv_idx; +ALTER TABLE tab ADD INDEX inv_idx(s) TYPE inverted(2) GRANULARITY 1; ``` To use the index, no special functions or syntax are required. Typical string search predicates automatically leverage the index. As examples, consider: ```sql -SELECT * from tab WHERE s == 'Hello World;; -SELECT * from tab WHERE s IN (‘Hello’, ‘World’); -SELECT * from tab WHERE s LIKE ‘%Hello%’; -SELECT * from tab WHERE multiSearchAny(s, ‘Hello’, ‘World’); -SELECT * from tab WHERE hasToken(s, ‘Hello’); -SELECT * from tab WHERE multiSearchAll(s, [‘Hello’, ‘World’]) +INSERT INTO tab(key, str) values (1, 'Hello World'); +SELECT * from tab WHERE str == 'Hello World'; +SELECT * from tab WHERE str IN ('Hello', 'World'); +SELECT * from tab WHERE str LIKE '%Hello%'; +SELECT * from tab WHERE multiSearchAny(str, ['Hello', 'World']); +SELECT * from tab WHERE hasToken(str, 'Hello'); ``` The inverted index also works on columns of type `Array(String)`, `Array(FixedString)`, `Map(String)` and `Map(String)`. Like for other secondary indices, each column part has its own inverted index. Furthermore, each inverted index is internally divided into -"segments". The existence and size of the segments is generally transparent to users but the segment size determines the memory consumption +"segments". The existence and size of the segments are generally transparent to users but the segment size determines the memory consumption during index construction (e.g. when two parts are merged). Configuration parameter "max_digestion_size_per_segment" (default: 256 MB) controls the amount of data read consumed from the underlying column before a new segment is created. Incrementing the parameter raises the -intermediate memory consumption for index constuction but also improves lookup performance since fewer segments need to be checked on +intermediate memory consumption for index construction but also improves lookup performance since fewer segments need to be checked on average to evaluate a query. Unlike other secondary indices, inverted indexes (for now) map to row numbers (row ids) instead of granule ids. The reason for this design is performance. In practice, users often search for multiple terms at once. For example, filter predicate `WHERE s LIKE '%little%' OR s LIKE -'%big%'` can be evaluated directly using an inverted index by forming the union of the rowid lists for terms "little" and "big". This also -means that parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in future). +'%big%'` can be evaluated directly using an inverted index by forming the union of the row id lists for terms "little" and "big". This also +means that the parameter `GRANULARITY` supplied to index creation has no meaning (it may be removed from the syntax in the future). diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7daa0dbbb97..be77a0ae070 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -77,7 +77,7 @@ Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting th #### PARTITION BY -`PARTITION BY` — The [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). +`PARTITION BY` — The [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases, you don't need a partition key, and if you do need to partition, generally you do not need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. @@ -470,6 +470,9 @@ The `set` index can be used with all functions. Function subsets for other index | [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | | [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. @@ -920,15 +923,25 @@ Configuration markup: 4 1000 /var/lib/clickhouse/disks/s3/ - true - /var/lib/clickhouse/disks/s3/cache/ false + + cache + s3 + /var/lib/clickhouse/disks/s3_cache/ + 10Gi + ... ``` +:::note cache configuration +ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions. +::: + +### Configuring the S3 disk + Required parameters: - `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data. @@ -948,8 +961,6 @@ Optional parameters: - `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`. - `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`. - `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks//`. -- `cache_enabled` — Allows to cache mark and index files on local FS. Default value is `true`. -- `cache_path` — Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. - `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`. - `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set. - `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited). @@ -957,6 +968,30 @@ Optional parameters: - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +### Configuring the cache + +This is the cache configuration from above: +```xml + + cache + s3 + /var/lib/clickhouse/disks/s3_cache/ + 10Gi + +``` + +These parameters define the cache layer: +- `type` — If a disk is of type `cache` it caches mark and index files in memory. +- `disk` — The name of the disk that will be cached. + +Cache parameters: +- `path` — The path where metadata for the cache is stored. +- `max_size` — The size (amount of memory) that the cache can grow to. + +:::tip +There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details. +::: + S3 disk can be configured as `main` or `cold` storage: ``` xml diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 5a9113f3a18..8314c511236 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -86,3 +86,9 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 - `SELECT ... SAMPLE` - Indices - Replication + +## PARTITION BY + +`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index 32fa2cd9b2b..77d90082ddc 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -19,7 +19,7 @@ ENGINE = GenerateRandom([random_seed] [,max_string_length] [,max_array_length]) ``` The `max_array_length` and `max_string_length` parameters specify maximum length of all -array columns and strings correspondingly in generated data. +array or map columns and strings correspondingly in generated data. Generate table engine supports only `SELECT` queries. diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 095ffbbb827..af8a80c75b0 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -96,3 +96,9 @@ SELECT * FROM url_engine_table - `ALTER` and `SELECT...SAMPLE` operations. - Indexes. - Replication. + +## PARTITION BY + +`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. diff --git a/docs/en/getting-started/example-datasets/laion.md b/docs/en/getting-started/example-datasets/laion.md new file mode 100644 index 00000000000..077adf016a3 --- /dev/null +++ b/docs/en/getting-started/example-datasets/laion.md @@ -0,0 +1,259 @@ +# Laion-400M dataset + +The dataset contains 400 million images with English text. For more information follow this [link](https://laion.ai/blog/laion-400-open-dataset/). Laion provides even larger datasets (e.g. [5 billion](https://laion.ai/blog/laion-5b/)). Working with them will be similar. + +The dataset has prepared embeddings for texts and images. This will be used to demonstrate [Approximate nearest neighbor search indexes](../../engines/table-engines/mergetree-family/annindexes.md). + +## Prepare data + +Embeddings are stored in `.npy` files, so we have to read them with python and merge with other data. + +Download data and process it with simple `download.sh` script: + +```bash +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy +python3 process.py ${1} +``` + +Where `process.py`: + +```python +import pandas as pd +import numpy as np +import os +import sys + +str_i = str(sys.argv[1]) +npy_file = "img_emb_" + str_i + '.npy' +metadata_file = "metadata_" + str_i + '.parquet' +text_npy = "text_emb_" + str_i + '.npy' + +# load all files +im_emb = np.load(npy_file) +text_emb = np.load(text_npy) +data = pd.read_parquet(metadata_file) + +# combine them +data = pd.concat([data, pd.DataFrame({"image_embedding" : [*im_emb]}), pd.DataFrame({"text_embedding" : [*text_emb]})], axis=1, copy=False) + +# you can save more columns +data = data[['url', 'caption', 'similarity', "image_embedding", "text_embedding"]] + +# transform np.arrays to lists +data['image_embedding'] = data['image_embedding'].apply(lambda x: list(x)) +data['text_embedding'] = data['text_embedding'].apply(lambda x: list(x)) + +# this small hack is needed becase caption sometimes contains all kind of quotes +data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " ")) + +# save data to file +data.to_csv(str_i + '.csv', header=False) + +# previous files can be removed +os.system(f"rm {npy_file} {metadata_file} {text_npy}") +``` + +You can download data with +```bash +seq 0 409 | xargs -P100 -I{} bash -c './download.sh {}' +``` + +The dataset is divided into 409 files. If you want to work only with a certain part of the dataset, just change the limits. + +## Create table for laion + +Without indexes table can be created by + +```sql +CREATE TABLE laion_dataset +( + `id` Int64, + `url` String, + `caption` String, + `similarity` Float32, + `image_embedding` Array(Float32), + `text_embedding` Array(Float32) +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 8192 +``` + +Fill table with data: + +```sql +INSERT INTO laion_dataset FROM INFILE '{path_to_csv_files}/*.csv' +``` + +## Check data in table without indexes + +Let's check the work of the following query on the part of the dataset (8 million records): + +```sql +select url, caption from test_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 30 +``` + +Since the embeddings for images and texts may not match, let's also require a certain threshold of matching accuracy to get images that are more likely to satisfy our queries. The client parameter `target`, which is an array of 512 elements. See later in this article for a convenient way of obtaining such vectors. I used a random picture of a cat from the Internet as a target vector. + +**The result** + +``` +┌─url───────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption────────────────────────────────────────────────────────────────┐ +│ https://s3.amazonaws.com/filestore.rescuegroups.org/6685/pictures/animals/13884/13884995/63318230_463x463.jpg │ Adoptable Female Domestic Short Hair │ +│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/8/b/6/239905226.jpg │ Adopt A Pet :: Marzipan - New York, NY │ +│ http://d1n3ar4lqtlydb.cloudfront.net/9/2/4/248407625.jpg │ Adopt A Pet :: Butterscotch - New Castle, DE │ +│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/e/e/c/245615237.jpg │ Adopt A Pet :: Tiggy - Chicago, IL │ +│ http://pawsofcoronado.org/wp-content/uploads/2012/12/rsz_pumpkin.jpg │ Pumpkin an orange tabby kitten for adoption │ +│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/7/8/3/188700997.jpg │ Adopt A Pet :: Brian the Brad Pitt of cats - Frankfort, IL │ +│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/8/b/d/191533561.jpg │ Domestic Shorthair Cat for adoption in Mesa, Arizona - Charlie │ +│ https://s3.amazonaws.com/pet-uploads.adoptapet.com/0/1/2/221698235.jpg │ Domestic Shorthair Cat for adoption in Marietta, Ohio - Daisy (Spayed) │ +└───────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────────────────────────┘ + +8 rows in set. Elapsed: 6.432 sec. Processed 19.65 million rows, 43.96 GB (3.06 million rows/s., 6.84 GB/s.) +``` + +## Add indexes + +Create a new table or follow instructions from [alter documentation](../../sql-reference/statements/alter/skipping-index.md). + +```sql +CREATE TABLE laion_dataset +( + `id` Int64, + `url` String, + `caption` String, + `similarity` Float32, + `image_embedding` Array(Float32), + `text_embedding` Array(Float32), + INDEX annoy_image image_embedding TYPE annoy(1000) GRANULARITY 1000, + INDEX annoy_text text_embedding TYPE annoy(1000) GRANULARITY 1000 +) +ENGINE = MergeTree +ORDER BY id +SETTINGS index_granularity = 8192 +``` + +When created, the index will be built by L2Distance. You can read more about the parameters in the [annoy documentation](../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy). It makes sense to build indexes for a large number of granules. If you need good speed, then GRANULARITY should be several times larger than the expected number of results in the search. +Now let's check again with the same query: + +```sql +select url, caption from test_indexes_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 8 +``` + +**Result** + +``` +┌─url──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption──────────────────────────────────────────────────────────────┐ +│ http://tse1.mm.bing.net/th?id=OIP.R1CUoYp_4hbeFSHBaaB5-gHaFj │ bed bugs and pets can cats carry bed bugs pets adviser │ +│ http://pet-uploads.adoptapet.com/1/9/c/1963194.jpg?336w │ Domestic Longhair Cat for adoption in Quincy, Massachusetts - Ashley │ +│ https://thumbs.dreamstime.com/t/cat-bed-12591021.jpg │ Cat on bed Stock Image │ +│ https://us.123rf.com/450wm/penta/penta1105/penta110500004/9658511-portrait-of-british-short-hair-kitten-lieing-at-sofa-on-sun.jpg │ Portrait of british short hair kitten lieing at sofa on sun. │ +│ https://www.easypetmd.com/sites/default/files/Wirehaired%20Vizsla%20(2).jpg │ Vizsla (Wirehaired) image 3 │ +│ https://images.ctfassets.net/yixw23k2v6vo/0000000200009b8800000000/7950f4e1c1db335ef91bb2bc34428de9/dog-cat-flickr-Impatience_1.jpg?w=600&h=400&fm=jpg&fit=thumb&q=65&fl=progressive │ dog and cat image │ +│ https://i1.wallbox.ru/wallpapers/small/201523/eaa582ee76a31fd.jpg │ cats, kittens, faces, tonkinese │ +│ https://www.baxterboo.com/images/breeds/medium/cairn-terrier.jpg │ Cairn Terrier Photo │ +└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────────────────────────────────────────────────────────────┘ + +8 rows in set. Elapsed: 0.641 sec. Processed 22.06 thousand rows, 49.36 MB (91.53 thousand rows/s., 204.81 MB/s.) +``` + +The speed has increased significantly. But now, the results sometimes differ from what you are looking for. This is due to the approximation of the search and the quality of the constructed embedding. Note that the example was given for picture embeddings, but there are also text embeddings in the dataset, which can also be used for searching. + +## Scripts for embeddings + +Usually, we do not want to get embeddings from existing data, but to get them for new data and look for similar ones in old data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) for this purpose. They will allow you to set the `target` vector without leaving the client. All of the following scripts will be written for the `ViT-B/32` model, as it was used for this dataset. You can use any model, but it is necessary to build embeddings in the dataset and for new objects using the same model. + +### Text embeddings + +`encode_text.py`: +```python +#!/usr/bin/python3 +import clip +import torch +import numpy as np +import sys + +if __name__ == '__main__': + device = "cuda" if torch.cuda.is_available() else "cpu" + model, preprocess = clip.load("ViT-B/32", device=device) + for text in sys.stdin: + inputs = clip.tokenize(text) + with torch.no_grad(): + text_features = model.encode_text(inputs)[0].tolist() + sys.stdout.flush() +``` + +`encode_text_function.xml`: +```xml + + + executable + encode_text + Array(Float32) + + String + text + + TabSeparated + encode_text.py + 1000000 + + +``` + +Now we can simply use: + +```sql +SELECT encode_text('cat'); +``` + +The first use will be slow because the model needs to be loaded. But repeated queries will be fast. Then we copy the results to ``set param_target=...`` and can easily write queries + +### Image embeddings + +For pictures, the process is similar, but you send the path instead of the picture (if necessary, you can implement a download picture with processing, but it will take longer) + +`encode_picture.py` +```python +#!/usr/bin/python3 +import clip +import torch +import numpy as np +from PIL import Image +import sys + +if __name__ == '__main__': + device = "cuda" if torch.cuda.is_available() else "cpu" + model, preprocess = clip.load("ViT-B/32", device=device) + for text in sys.stdin: + image = preprocess(Image.open(text.strip())).unsqueeze(0).to(device) + with torch.no_grad(): + image_features = model.encode_image(image)[0].tolist() + print(image_features) + sys.stdout.flush() +``` + +`encode_picture_function.xml` +```xml + + + executable_pool + encode_picture + Array(Float32) + + String + path + + TabSeparated + encode_picture.py + 1000000 + + +``` + +The query: +```sql +SELECT encode_picture('some/path/to/your/picture'); +``` diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md index 10baf899fc6..32fe62865d4 100644 --- a/docs/en/getting-started/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -119,9 +119,9 @@ We use [CSVWithNames](../../interfaces/formats.md#csvwithnames) format as the da We disable `format_csv_allow_single_quotes` as only double quotes are used for data fields and single quotes can be inside the values and should not confuse the CSV parser. -We disable [input_format_null_as_default](../../operations/settings/settings.md#settings-input-format-null-as-default) as our data does not have [NULL](../../sql-reference/syntax.md#null-literal). Otherwise ClickHouse will try to parse `\N` sequences and can be confused with `\` in data. +We disable [input_format_null_as_default](../../operations/settings/settings-formats.md#settings-input-format-null-as-default) as our data does not have [NULL](../../sql-reference/syntax.md#null-literal). Otherwise ClickHouse will try to parse `\N` sequences and can be confused with `\` in data. -The setting [date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format) allows to parse [DateTime](../../sql-reference/data-types/datetime.md) fields in wide variety of formats. For example, ISO-8601 without seconds like '2000-01-01 01:02' will be recognized. Without this setting only fixed DateTime format is allowed. +The setting [date_time_input_format best_effort](../../operations/settings/settings-formats.md#settings-date_time_input_format) allows to parse [DateTime](../../sql-reference/data-types/datetime.md) fields in wide variety of formats. For example, ISO-8601 without seconds like '2000-01-01 01:02' will be recognized. Without this setting only fixed DateTime format is allowed. ## Denormalize the Data {#denormalize-data} diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index 12a33791235..7093a2df04f 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -60,7 +60,7 @@ ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhou `xargs -P100` specifies to use up to 100 parallel workers but as we only have 30 files, the number of workers will be only 30. - For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for `xargs` with `-I{}`). - The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`. -- We also asked to parse [DateTime](../../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets. +- We also asked to parse [DateTime](../../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../../operations/settings/settings-formats.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets. Finally, `clickhouse-client` will do insertion. It will read input data in [CSVWithNames](../../interfaces/formats.md#csvwithnames) format. diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md index e72e23208ac..b520220984c 100644 --- a/docs/en/getting-started/index.md +++ b/docs/en/getting-started/index.md @@ -22,8 +22,9 @@ functions in ClickHouse. The sample datasets include: - The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse - The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables - The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data -- The [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset -- The [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) shows how JSON data can be loaded -- The [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3 +- The [Laion dataset](../getting-started/example-datasets/laion.md) has an example of [Approximate nearest neighbor search indexes](../engines/table-engines/mergetree-family/annindexes.md) usage +- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset +- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3 +- [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) shows how to generate random data if none of the above fit your needs. View the **Tutorials and Datasets** menu for a complete list of sample datasets. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index d384ed639eb..6bbf7fb4696 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -83,9 +83,10 @@ The supported formats are: | [RawBLOB](#rawblob) | ✔ | ✔ | | [MsgPack](#msgpack) | ✔ | ✔ | | [MySQLDump](#mysqldump) | ✔ | ✗ | +| [Markdown](#markdown) | ✗ | ✔ | -You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings.md) section. +You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section. ## TabSeparated {#tabseparated} @@ -148,10 +149,10 @@ Only a small set of symbols are escaped. You can easily stumble onto a string va Arrays are written as a list of comma-separated values in square brackets. Number items in the array are formatted as normally. `Date` and `DateTime` types are written in single quotes. Strings are written in single quotes with the same escaping rules as above. -[NULL](/docs/en/sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) (default value is `\N`). +[NULL](/docs/en/sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_tsv_null_representation) (default value is `\N`). In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array. @@ -183,12 +184,13 @@ SELECT * FROM nestedt FORMAT TSV ### TabSeparated format settings {#tabseparated-format-settings} -- [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. -- [input_format_tsv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. -- [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [format_tsv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. +- [input_format_tsv_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. +- [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. ## TabSeparatedRaw {#tabseparatedraw} @@ -204,8 +206,8 @@ Differs from the `TabSeparated` format in that the column names are written in t During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness. :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -216,10 +218,10 @@ This format is also available under the name `TSVWithNames`. Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -427,49 +429,50 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. ## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](/docs/en/operations/settings/settings-formats.md/#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv ``` -\*By default, the delimiter is `,`. See the [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) setting for more information. +\*By default, the delimiter is `,`. See the [format_csv_delimiter](/docs/en/operations/settings/settings-formats.md/#format_csv_delimiter) setting for more information. When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. -`NULL` is formatted according to setting [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_csv_null_representation) (default value is `\N`). +`NULL` is formatted according to setting [format_csv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_csv_null_representation) (default value is `\N`). In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to the ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) to optimize ENUM parsing. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_csv_enum_as_number) to optimize ENUM parsing. The CSV format supports the output of totals and extremes the same way as `TabSeparated`. ### CSV format settings {#csv-format-settings} -- [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. -- [format_csv_allow_single_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. -- [format_csv_allow_double_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. -- [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. -- [input_format_csv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions, [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. -- [input_format_csv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [input_format_csv_arrays_as_nested_csv](/docs/en/operations/settings/settings.md/#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. -- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`. +- [format_csv_delimiter](/docs/en/operations/settings/settings-formats.md/#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. +- [format_csv_allow_single_quotes](/docs/en/operations/settings/settings-formats.md/#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. +- [format_csv_allow_double_quotes](/docs/en/operations/settings/settings-formats.md/#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. +- [format_csv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. +- [input_format_csv_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions, [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_csv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. +- [input_format_csv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [input_format_csv_arrays_as_nested_csv](/docs/en/operations/settings/settings-formats.md/#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. +- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`. +- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. ## CSVWithNames {#csvwithnames} Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -478,16 +481,18 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## CustomSeparated {#format-customseparated} -Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_after_delimiter) settings, not from format strings. +Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings. + +If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces). @@ -496,8 +501,8 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -506,10 +511,10 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -535,11 +540,11 @@ To read data output by this format you can use [MySQLDump](#mysqldump) input for ### SQLInsert format settings {#sqlinsert-format-settings} -- [output_format_sql_insert_max_batch_size](/docs/en/operations/settings/settings.md/#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. -- [output_format_sql_insert_table_name](/docs/en/operations/settings/settings.md/#output_format_sql_insert_table_name) - The name of the table in the output INSERT query. Default value - `'table'`. -- [output_format_sql_insert_include_column_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. -- [output_format_sql_insert_use_replace](/docs/en/operations/settings/settings.md/#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. -- [output_format_sql_insert_quote_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_quote_names) - Quote column names with "\`" characters. Default value - `true`. +- [output_format_sql_insert_max_batch_size](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. +- [output_format_sql_insert_table_name](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_table_name) - The name of the table in the output INSERT query. Default value - `'table'`. +- [output_format_sql_insert_include_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. +- [output_format_sql_insert_use_replace](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. +- [output_format_sql_insert_quote_names](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_quote_names) - Quote column names with "\`" characters. Default value - `true`. ## JSON {#json} @@ -599,7 +604,7 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA } ``` -The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) to 0. +The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) to 0. `rows` – The total number of output rows. @@ -610,14 +615,14 @@ If the query contains GROUP BY, rows_before_limit_at_least is the exact number o `extremes` – Extreme values (when extremes are set to 1). -ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) to 1. +ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) to 1. **See Also** - [JSONEachRow](#jsoneachrow) format -- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) setting +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) setting -For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, +For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings-formats.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONStrings {#jsonstrings} @@ -690,8 +695,8 @@ Example: } ``` -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. -Columns that are not present in the block will be filled with default values (you can use the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. +Columns that are not present in the block will be filled with default values (you can use the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} @@ -739,7 +744,7 @@ Example: } ``` -For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, +For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings-formats.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONAsString {#jsonasstring} @@ -891,7 +896,7 @@ Example: ] ``` -Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONEachRow {#jsoneachrow} @@ -905,7 +910,7 @@ Example: {"num":44,"str":"hello","arr":[0,1,2,3]} ``` -While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. ## JSONStringsEachRow {#jsonstringseachrow} @@ -960,8 +965,8 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -970,10 +975,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -982,8 +987,8 @@ the types from input data will be compared with the types of the corresponding c Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -992,10 +997,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -1021,7 +1026,7 @@ Example: } ``` -To use an object name as a column value you can use the special setting [format_json_object_each_row_column_for_object_name](/docs/en/operations/settings/settings.md/#format_json_object_each_row_column_for_object_name). The value of this setting is set to the name of a column, that is used as JSON key for a row in the resulting object. +To use an object name as a column value you can use the special setting [format_json_object_each_row_column_for_object_name](/docs/en/operations/settings/settings-formats.md/#format_json_object_each_row_column_for_object_name). The value of this setting is set to the name of a column, that is used as JSON key for a row in the resulting object. Examples: For output: @@ -1095,7 +1100,7 @@ ClickHouse ignores spaces between elements and commas after the objects. You can ClickHouse substitutes omitted values with the default values for the corresponding [data types](/docs/en/sql-reference/data-types/index.md). -If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting. +If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting. Consider the following table: @@ -1140,7 +1145,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y ### Usage of Nested Structures {#jsoneachrow-nested} -If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) setting. +If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting. For example, consider the following table: @@ -1154,7 +1159,7 @@ As you can see in the `Nested` data type description, ClickHouse treats each com INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]} ``` -To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](/docs/en/operations/settings/settings.md/#input_format_import_nested_json). +To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json). ``` json { @@ -1199,20 +1204,21 @@ SELECT * FROM json_each_row_nested ### JSON formats settings {#json-formats-settings} -- [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. -- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. -- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. -- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. -- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. -- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. -- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. -- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. -- [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. -- [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. -- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. -- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`. -- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. -- [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. +- [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. +- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. +- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. +- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. +- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. +- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. +- [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`. +- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. +- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. +- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. +- [output_format_json_quote_decimals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. +- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings-formats.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. +- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`. +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. +- [output_format_json_validate_utf8](/docs/en/operations/settings/settings-formats.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. ## BSONEachRow {#bsoneachrow} @@ -1274,8 +1280,8 @@ Note: this format don't work properly on Big-Endian platforms. ### BSON format settings {#bson-format-settings} -- [output_format_bson_string_as_string](/docs/en/operations/settings/settings.md/#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. -- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. +- [output_format_bson_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. +- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. ## Native {#native} @@ -1408,12 +1414,12 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 ## Pretty formats settings {#pretty-formats-settings} -- [output_format_pretty_max_rows](/docs/en/operations/settings/settings.md/#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. -- [output_format_pretty_max_column_pad_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. -- [output_format_pretty_max_value_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. -- [output_format_pretty_color](/docs/en/operations/settings/settings.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. -- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. -- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. +- [output_format_pretty_max_rows](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. +- [output_format_pretty_max_column_pad_width](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. +- [output_format_pretty_max_value_width](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. +- [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. +- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. +- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. ## RowBinary {#rowbinary} @@ -1438,8 +1444,8 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column names :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -1452,16 +1458,16 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column types :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## RowBinary format settings {#row-binary-format-settings} -- [format_binary_max_string_size](/docs/en/operations/settings/settings.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. +- [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. ## Values {#data-format-values} @@ -1473,9 +1479,9 @@ This is the format that is used in `INSERT INTO t VALUES ...`, but you can also ## Values format settings {#values-format-settings} -- [input_format_values_interpret_expressions](/docs/en/operations/settings/settings.md/#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. -- [input_format_values_deduce_templates_of_expressions](/docs/en/operations/settings/settings.md/#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. -- [input_format_values_accurate_types_of_literals](/docs/en/operations/settings/settings.md/#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. +- [input_format_values_interpret_expressions](/docs/en/operations/settings/settings-formats.md/#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. +- [input_format_values_deduce_templates_of_expressions](/docs/en/operations/settings/settings-formats.md/#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. +- [input_format_values_accurate_types_of_literals](/docs/en/operations/settings/settings-formats.md/#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. ## Vertical {#vertical} @@ -1615,7 +1621,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | | `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings.md/#format_capn_proto_enum_comparising_mode) setting. +For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting. Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested. @@ -1714,7 +1720,7 @@ something_weird{problem="division by zero"} +Inf -3982045 ## Protobuf {#protobuf} -Protobuf - is a [Protocol Buffers](https://developers.google.com/protocol-buffers/) format. +Protobuf - is a [Protocol Buffers](https://protobuf.dev/) format. This format requires an external format schema. The schema is cached between queries. ClickHouse supports both `proto2` and `proto3` syntaxes. Repeated/optional/required fields are supported. @@ -1809,7 +1815,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | | `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings.md/#output_format_avro_string_column_pattern) +\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern) \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` @@ -1831,7 +1837,7 @@ Unused fields are skipped. Data types of ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to corresponding column type. -While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](/docs/en/operations/settings/settings.md/#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. +While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](/docs/en/operations/settings/settings-formats.md/#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. ### Selecting Data {#selecting-data-1} @@ -1846,7 +1852,7 @@ Column names must: - start with `[A-Za-z_]` - subsequently contain only `[A-Za-z0-9_]` -Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings.md/#output_format_avro_sync_interval) respectively. +Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings-formats.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings-formats.md/#output_format_avro_sync_interval) respectively. ## AvroConfluent {#data-format-avro-confluent} @@ -1856,7 +1862,7 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w Schemas are cached once resolved. -Schema Registry URL is configured with [format_avro_schema_registry_url](/docs/en/operations/settings/settings.md/#format_avro_schema_registry_url). +Schema Registry URL is configured with [format_avro_schema_registry_url](/docs/en/operations/settings/settings-formats.md/#format_avro_schema_registry_url). ### Data Types Matching {#data_types-matching-1} @@ -1954,12 +1960,12 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t ### Parquet format settings {#parquet-format-settings} -- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. -- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. -- [input_format_parquet_import_nested](/docs/en/operations/settings/settings.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. -- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. -- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. -- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. +- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. +- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. +- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. +- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. +- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. +- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. ## Arrow {#data-format-arrow} @@ -1997,7 +2003,7 @@ The table below shows supported data types and how they match ClickHouse [data t Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. +The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. Unsupported Arrow data types: `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. @@ -2021,12 +2027,12 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. -- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. ## ArrowStream {#data-format-arrow-stream} @@ -2081,11 +2087,11 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). @@ -2133,13 +2139,13 @@ When working with the `Regexp` format, you can use the following settings: **Usage** -The regular expression from [format_regexp](/docs/en/operations/settings/settings.md/#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. +The regular expression from [format_regexp](/docs/en/operations/settings/settings-formats.md/#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. Lines of the imported data must be separated by newline character `'\n'` or DOS-style newline `"\r\n"`. -The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) setting. +The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_regexp_escaping_rule) setting. -If the regular expression does not match the line and [format_regexp_skip_unmatched](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. +If the regular expression does not match the line and [format_regexp_skip_unmatched](/docs/en/operations/settings/settings-formats.md/#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. **Example** @@ -2197,8 +2203,8 @@ in the server configuration. ## Skipping Errors {#skippingerrors} -Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](/docs/en/operations/settings/settings.md/#input_format_allow_errors_num) and -[input_format_allow_errors_ratio](/docs/en/operations/settings/settings.md/#input_format_allow_errors_ratio) settings. +Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](/docs/en/operations/settings/settings-formats.md/#input_format_allow_errors_num) and +[input_format_allow_errors_ratio](/docs/en/operations/settings/settings-formats.md/#input_format_allow_errors_ratio) settings. Limitations: - In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly. - `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty. @@ -2277,17 +2283,17 @@ $ clickhouse-client --query="SELECT * FROM msgpack FORMAT MsgPack" > tmp_msgpack ### MsgPack format settings {#msgpack-format-settings} -- [input_format_msgpack_number_of_columns](/docs/en/operations/settings/settings.md/#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. -- [output_format_msgpack_uuid_representation](/docs/en/operations/settings/settings.md/#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. +- [input_format_msgpack_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. +- [output_format_msgpack_uuid_representation](/docs/en/operations/settings/settings-formats.md/#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. ## MySQLDump {#mysqldump} ClickHouse supports reading MySQL [dumps](https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html). It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. -You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_table_name) settings. -If setting [input_format_mysql_dump_map_columns](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_map_columns) is set to 1 and +You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](/docs/en/operations/settings/settings-formats.md/#input_format_mysql_dump_table_name) settings. +If setting [input_format_mysql_dump_map_columns](/docs/en/operations/settings/settings-formats.md/#input_format_mysql_dump_map_columns) is set to 1 and dump contains CREATE query for specified table or column names in INSERT query the columns from input data will be mapped to the columns from the table by their names, -columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. This format supports schema inference: if the dump contains CREATE query for the specified table, the structure is extracted from it, otherwise schema is inferred from the data of INSERT queries. Examples: @@ -2343,3 +2349,26 @@ FROM file(dump.sql, MySQLDump) │ 3 │ └───┘ ``` + +## Markdown {#markdown} + +You can export results using [Markdown](https://en.wikipedia.org/wiki/Markdown) format to generate output ready to be pasted into your `.md` files: + +```sql +SELECT + number, + number * 2 +FROM numbers(5) +FORMAT Markdown +``` +```results +| number | multiply(number, 2) | +|-:|-:| +| 0 | 0 | +| 1 | 2 | +| 2 | 4 | +| 3 | 6 | +| 4 | 8 | +``` + +Markdown table will be generated automatically and can be used on markdown-enabled platforms, like Github. This format is used only for output. diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 9d87bdced1a..728afa73a17 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -558,6 +558,8 @@ and if the value is not a number, ClickHouse treats it as a string. If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_csv_use_best_effort_in_schema_inference` and ClickHouse will treat all columns as Strings. +If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default. + **Examples:** Integers, Floats, Bools, Strings: @@ -669,6 +671,61 @@ DESC format(CSV, '"[1,2,3]",42.42,Hello World!') └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +Examples of header auto-detection (when `input_format_csv_detect_header` is enabled): + +Only names: +```sql +SELECT * FROM format(CSV, +$$"number","string","array" +42,"Hello","[1, 2, 3]" +43,"World","[4, 5, 6]" +$$) +``` + +```response +┌─number─┬─string─┬─array───┐ +│ 42 │ Hello │ [1,2,3] │ +│ 43 │ World │ [4,5,6] │ +└────────┴────────┴─────────┘ +``` + +Names and types: + +```sql +DESC format(CSV, +$$"number","string","array" +"UInt32","String","Array(UInt16)" +42,"Hello","[1, 2, 3]" +43,"World","[4, 5, 6]" +$$) +``` + +```response +┌─name───┬─type──────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ UInt32 │ │ │ │ │ │ +│ string │ String │ │ │ │ │ │ +│ array │ Array(UInt16) │ │ │ │ │ │ +└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected: + +```sql +SELECT * FROM format(CSV, +$$"first_column","second_column" +"Hello","World" +"World","Hello" +$$) +``` + +```response +┌─c1───────────┬─c2────────────┐ +│ first_column │ second_column │ +│ Hello │ World │ +│ World │ Hello │ +└──────────────┴───────────────┘ +``` + ## TSV/TSKV {#tsv-tskv} In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using @@ -677,6 +734,7 @@ the recursive parser to determine the most appropriate type. If the type cannot If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_tsv_use_best_effort_in_schema_inference` and ClickHouse will treat all columns as Strings. +If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default. **Examples:** @@ -799,6 +857,61 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!') └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +Examples of header auto-detection (when `input_format_tsv_detect_header` is enabled): + +Only names: +```sql +SELECT * FROM format(TSV, +$$number string array +42 Hello [1, 2, 3] +43 World [4, 5, 6] +$$); +``` + +```response +┌─number─┬─string─┬─array───┐ +│ 42 │ Hello │ [1,2,3] │ +│ 43 │ World │ [4,5,6] │ +└────────┴────────┴─────────┘ +``` + +Names and types: + +```sql +DESC format(TSV, +$$number string array +UInt32 String Array(UInt16) +42 Hello [1, 2, 3] +43 World [4, 5, 6] +$$) +``` + +```response +┌─name───┬─type──────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ UInt32 │ │ │ │ │ │ +│ string │ String │ │ │ │ │ │ +│ array │ Array(UInt16) │ │ │ │ │ │ +└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected: + +```sql +SELECT * FROM format(TSV, +$$first_column second_column +Hello World +World Hello +$$) +``` + +```response +┌─c1───────────┬─c2────────────┐ +│ first_column │ second_column │ +│ Hello │ World │ +│ World │ Hello │ +└──────────────┴───────────────┘ +``` + ## Values {#values} In Values format ClickHouse extracts column value from the row and then parses it using @@ -911,6 +1024,8 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!') In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer the data type for each value according to escaping rule. +If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default. + **Example** ```sql @@ -937,6 +1052,34 @@ $$) └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +Example of header auto-detection (when `input_format_custom_detect_header` is enabled): + +```sql +SET format_custom_row_before_delimiter = '', + format_custom_row_after_delimiter = '\n', + format_custom_row_between_delimiter = '\n', + format_custom_result_before_delimiter = '\n', + format_custom_result_after_delimiter = '\n', + format_custom_field_delimiter = '', + format_custom_escaping_rule = 'Quoted' + +DESC format(CustomSeparated, $$ +'number''string''array' + +42.42'Some string 1'[1, NULL, 3] + +NULL'Some string 3'[1, 2, NULL] + +$$) +``` + +```response +┌─number─┬─string────────┬─array──────┐ +│ 42.42 │ Some string 1 │ [1,NULL,3] │ +│ ᴺᵁᴸᴸ │ Some string 3 │ [1,2,NULL] │ +└────────┴───────────────┴────────────┘ +``` + ## Template {#template} In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the @@ -1193,7 +1336,7 @@ DESC format(JSONEachRow, $$ └──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings.md#date_time_input_format) +Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) ### input_format_try_infer_dates diff --git a/docs/en/operations/_troubleshooting.md b/docs/en/operations/_troubleshooting.md index aed63ec4d0f..a5c07ed18bd 100644 --- a/docs/en/operations/_troubleshooting.md +++ b/docs/en/operations/_troubleshooting.md @@ -56,6 +56,19 @@ sudo apt-get clean sudo apt-get autoclean ``` +### You Can't Get Packages With Yum Because Of Wrong Signature + +Possible issue: the cache is wrong, maybe it's broken after updated GPG key in 2022-09. + +The solution is to clean out the cache and lib directory for yum: + +``` +sudo find /var/lib/yum/repos/ /var/cache/yum/ -name 'clickhouse-*' -type d -exec rm -rf {} + +sudo rm -f /etc/yum.repos.d/clickhouse.repo +``` + +After that follow the [install guide](../getting-started/install.md#from-rpm-packages) + ## Connecting to the Server {#troubleshooting-accepts-no-connections} Possible issues: diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index 4feb434d762..f1a5649cd4c 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -79,7 +79,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - ASYNC: backup or restore asynchronously - PARTITIONS: a list of partitions to restore - SETTINGS: - - [`compression_method`](en/sql-reference/statements/create/table/#column-compression-codecs) and compression_level + - [`compression_method`](/docs/en/sql-reference/statements/create/table.md/#column-compression-codecs) and compression_level - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 86760ec245f..0f9156048c4 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -22,5 +22,6 @@ Additional cache types: - [Dictionaries](../sql-reference/dictionaries/index.md) data cache. - Schema inference cache. - [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks. +- [(Experimental) Query cache](query-cache.md). To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements. diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index cbb8d0a4c02..f3cfa4a5372 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -239,7 +239,7 @@ Example of configuration: - localhost + remote_host 9000 system foo diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md new file mode 100644 index 00000000000..1a486de7904 --- /dev/null +++ b/docs/en/operations/query-cache.md @@ -0,0 +1,112 @@ +--- +slug: /en/operations/query-cache +sidebar_position: 65 +sidebar_label: Query Cache [experimental] +--- + +# Query Cache [experimental] + +The query cache allows to compute `SELECT` queries just once and to serve further executions of the same query directly from the cache. +Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server. + +## Background, Design and Limitations + +Query caches can generally be viewed as transactionally consistent or inconsistent. + +- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the `SELECT` query changes + or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing + merges. Transactionally consistent caching is especially suitable for OLTP databases, for example + [MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query cache after v8.0) and + [Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm). +- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are + assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period. + This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient, + consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically + slowly enough that the database only needs to compute the report once (represented by the first `SELECT` query). Further queries can be + served directly from the query cache. In this example, a reasonable validity period could be 30 min. + +Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result, +the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side. +This reduces maintenance effort and avoids redundancy. + +:::warning +The query cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query +processing) where wrong results are returned. +::: + +## Configuration Settings and Usage + +As long as the result cache is experimental it must be activated using the following configuration setting: + +```sql +SET allow_experimental_query_cache = true; +``` + +Afterwards, setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries +of the current session should utilize the query cache. For example, the first execution of query + +```sql +SELECT some_expensive_calculation(column_1, column_2) +FROM table +SETTINGS use_query_cache = true; +``` + +will store the query result in the query cache. Subsequent executions of the same query (also with parameter `use_query_cache = true`) will +read the computed result from the cache and return it immediately. + +The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_cache](settings/settings.md#enable-writes-to-query-cache) +and [enable_reads_from_query_cache](settings/settings.md#enable-reads-from-query-cache) (both `true` by default). The former setting +controls whether query results are stored in the cache, whereas the latter setting determines if the database should try to retrieve query +results from the cache. For example, the following query will use the cache only passively, i.e. attempt to read from it but not store its +result in it: + +```sql +SELECT some_expensive_calculation(column_1, column_2) +FROM table +SETTINGS use_query_cache = true, enable_writes_to_query_cache = false; +``` + +For maximum control, it is generally recommended to provide settings "use_query_cache", "enable_writes_to_query_cache" and +"enable_reads_from_query_cache" only with specific queries. It is also possible to enable caching at user or profile level (e.g. via `SET +use_query_cache = true`) but one should keep in mind that all `SELECT` queries including monitoring or debugging queries to system tables +may return cached results then. + +The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table +`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table +`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not +affect the cache miss counter. + +The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be +changed (see below) but doing so is not recommended for security reasons. + +Query results are referenced in the query cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree) of +their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query. To +make the matching more natural, all query-level settings related to the query cache are removed from the AST. + +If the query was aborted due to an exception or user cancellation, no entry is written into the query cache. + +The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can +be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache). + +To define how long a query must run at least such that its result can be cached, you can use setting +[query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query + +``` sql +SELECT some_expensive_calculation(column_1, column_2) +FROM table +SETTINGS use_query_cache = true, query_cache_min_query_duration = 5000; +``` + +is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is +cached - for that use setting [query_cache_min_query_runs](settings/settings.md#query-cache-min-query-runs). + +Entries in the query cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a different +value can be specified at session, profile or query level using setting [query_cache_ttl](settings/settings.md#query-cache-ttl). + +Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using +setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions). + +Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a +row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can +be marked accessible by other users (i.e. shared) by supplying setting +[query_cache_share_between_users](settings/settings.md#query-cache-share-between-users). diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 13873827722..761d27a889f 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1270,6 +1270,32 @@ If the table does not exist, ClickHouse will create it. If the structure of the ``` +## query_cache {#server_configuration_parameters_query-cache} + +[Query cache](../query-cache.md) configuration. + +The following settings are available: + +- `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB). +- `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`. +- `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB). +- `max_entry_records`: The maximum number of records `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil). + +:::warning +Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether. +::: + +**Example** + +```xml + + 1073741824 + 1024 + 1048576 + 30000000 + +``` + ## query_thread_log {#server_configuration_parameters-query_thread_log} Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index f254d57ec7d..fae282c861f 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -1,6 +1,6 @@ --- sidebar_label: Settings Overview -sidebar_position: 51 +sidebar_position: 1 slug: /en/operations/settings/ pagination_next: en/operations/settings/settings --- diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index d1f94cf183c..5bc174727ad 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -106,14 +106,20 @@ Possible values: Default value: 1. The delay (in milliseconds) for `INSERT` is calculated by the formula: - ```code max_k = parts_to_throw_insert - parts_to_delay_insert k = 1 + parts_count_in_partition - parts_to_delay_insert delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k) ``` +For example, if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds. -For example if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds. +Starting from version 23.1 formula has been changed to: +```code +allowed_parts_over_threshold = parts_to_throw_insert - parts_to_delay_insert +parts_over_threshold = parts_count_in_partition - parts_to_delay_insert + 1 +delay_milliseconds = max(min_delay_to_insert_ms, (max_delay_to_insert * 1000) * parts_over_threshold / allowed_parts_over_threshold) +``` +For example, if a partition has 224 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, min_delay_to_insert_ms = 10, `INSERT` is delayed for `max( 10, 1 * 1000 * (224 - 150 + 1) / (300 - 150) ) = 500` milliseconds. ## max_parts_in_total {#max-parts-in-total} @@ -227,7 +233,7 @@ Possible values: Default value: 100. -Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time. +Normally, the `use_async_block_ids_cache` updates as soon as there are updates in the watching keeper path. However, the cache updates might be too frequent and become a heavy burden. This minimum interval prevents the cache from updating too fast. Note that if we set this value too long, the block with duplicated inserts will have a longer retry time. ## max_replicated_logs_to_keep diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md new file mode 100644 index 00000000000..fd727704710 --- /dev/null +++ b/docs/en/operations/settings/settings-formats.md @@ -0,0 +1,1492 @@ +--- +sidebar_label: Format Settings +sidebar_position: 52 +slug: /en/operations/settings/formats +toc_max_heading_level: 2 +--- + +# Format settings {#format-settings} + +## input_format_skip_unknown_fields {#input_format_skip_unknown_fields} + +Enables or disables skipping insertion of extra data. + +When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse does not insert extra data and does not throw an exception. + +Supported formats: + +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) +- [TSKV](../../interfaces/formats.md/#tskv) +- All formats with suffixes WithNames/WithNamesAndTypes +- [JSONColumns](../../interfaces/formats.md/#jsoncolumns) +- [MySQLDump](../../interfaces/formats.md/#mysqldump) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_with_names_use_header {#input_format_with_names_use_header} + +Enables or disables checking the column order when inserting data. + +To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table. + +Supported formats: + +- [CSVWithNames](../../interfaces/formats.md/#csvwithnames) +- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) +- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes) +- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames) +- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_with_types_use_header {#input_format_with_types_use_header} + +Controls whether format parser should check if data types from the input data match data types from the target table. + +Supported formats: + +- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) +- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_defaults_for_omitted_fields {#input_format_defaults_for_omitted_fields} + +When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. + +:::note +When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +::: + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_null_as_default {#input_format_null_as_default} + +Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). +If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. + +This setting is applicable to [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) queries for text input formats. + +Possible values: + +- 0 — Inserting `NULL` into a not nullable column causes an exception. +- 1 — `NULL` fields are initialized with default column values. + +Default value: `1`. + +## input_format_allow_seeks {#input_format_allow_seeks} + +Allow seeks while reading in ORC/Parquet/Arrow input formats. + +Enabled by default. + +## input_format_max_rows_to_read_for_schema_inference {#input_format_max_rows_to_read_for_schema_inference} + +The maximum rows of data to read for automatic schema inference. + +Default value: `25'000`. + +## column_names_for_schema_inference {#column_names_for_schema_inference} + +The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...' + +## schema_inference_hints {#schema_inference_hints} + +The list of column names and types to use as hints in schema inference for formats without schema. + +Example: + +Query: +```sql +desc format(JSONEachRow, '{"x" : 1, "y" : "String", "z" : "0.0.0.0" }') settings schema_inference_hints='x UInt8, z IPv4'; +``` + +Result: +```sql +x UInt8 +y Nullable(String) +z IPv4 +``` + +## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} + +Controls making inferred types `Nullable` in schema inference for formats without information about nullability. +If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. + +Default value: `true`. + +## input_format_try_infer_integers {#input_format_try_infer_integers} + +If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. + +Enabled by default. + +## input_format_try_infer_dates {#input_format_try_infer_dates} + +If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. + +Enabled by default. + +## input_format_try_infer_datetimes {#input_format_try_infer_datetimes} + +If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. + +Enabled by default. + +## date_time_input_format {#date_time_input_format} + +Allows choosing a parser of the text representation of date and time. + +The setting does not apply to [date and time functions](../../sql-reference/functions/date-time-functions.md). + +Possible values: + +- `'best_effort'` — Enables extended parsing. + + ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. + +- `'basic'` — Use basic parser. + + ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. + +Default value: `'basic'`. + +See also: + +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) + +## date_time_output_format {#date_time_output_format} + +Allows choosing different output formats of the text representation of date and time. + +Possible values: + +- `simple` - Simple output format. + + ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. + +- `iso` - ISO output format. + + ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). + +- `unix_timestamp` - Unix timestamp output format. + + ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. + +Default value: `simple`. + +See also: + +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) + +## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} + +Deserialization of IPv4 will use default values instead of throwing exception on conversion error. + +Disabled by default. + +## input_format_ipv6_default_on_conversion_error {#input_format_ipv6_default_on_conversion_error} + +Deserialization of IPV6 will use default values instead of throwing exception on conversion error. + +Disabled by default. + +## bool_true_representation {#bool_true_representation} + +Text to represent true bool value in TSV/CSV/Vertical/Pretty formats. + +Default value: `true` + +## bool_false_representation {#bool_false_representation} + +Text to represent false bool value in TSV/CSV/Vertical/Pretty formats. + +Default value: `false` + +## output_format_decimal_trailing_zeros {#output_format_decimal_trailing_zeros} + +Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23. + +Disabled by default. + +## input_format_allow_errors_num {#input_format_allow_errors_num} + +Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). + +The default value is 0. + +Always pair it with `input_format_allow_errors_ratio`. + +If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. + +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. + +## input_format_allow_errors_ratio {#input_format_allow_errors_ratio} + +Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). +The percentage of errors is set as a floating-point number between 0 and 1. + +The default value is 0. + +Always pair it with `input_format_allow_errors_num`. + +If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. + +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. + +## format_schema {#format-schema} + +This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. + +## output_format_enable_streaming {#output_format_enable_streaming} + +Enable streaming in output formats that support it. + +Disabled by default. + +## output_format_write_statistics {#output_format_write_statistics} + +Write statistics about read rows, bytes, time elapsed in suitable output formats. + +Enabled by default + +## insert_distributed_one_random_shard {#insert_distributed_one_random_shard} + +Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key. + +By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards. + +Possible values: + +- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. +- 1 — Insertion is done randomly among all available shards when no distributed key is given. + +Default value: `0`. + +## JSON formats settings {#json-formats-settings} + +## input_format_import_nested_json {#input_format_import_nested_json} + +Enables or disables the insertion of JSON data with nested objects. + +Supported formats: + +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. + +See also: + +- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. + +## input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers} + +Allow parsing bools as numbers in JSON input formats. + +Enabled by default. + +## input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings} + +Allow parsing numbers as strings in JSON input formats. + +Disabled by default. + +## input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings} + +Allow parsing JSON objects as strings in JSON input formats. + +Example: + +```sql +SET input_format_json_read_objects_as_strings = 1; +CREATE TABLE test (id UInt64, obj String, date Date) ENGINE=Memory(); +INSERT INTO test FORMAT JSONEachRow {"id" : 1, "obj" : {"a" : 1, "b" : "Hello"}, "date" : "2020-01-01"}; +SELECT * FROM test; +``` + +Result: + +``` +┌─id─┬─obj──────────────────────┬───────date─┐ +│ 1 │ {"a" : 1, "b" : "Hello"} │ 2020-01-01 │ +└────┴──────────────────────────┴────────────┘ +``` + +Disabled by default. + +## input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata} + +For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1, +the types from metadata in input data will be compared with the types of the corresponding columns from the table. + +Enabled by default. + +## output_format_json_quote_64bit_integers {#output_format_json_quote_64bit_integers} + +Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format. +Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. + +Possible values: + +- 0 — Integers are output without quotes. +- 1 — Integers are enclosed in quotes. + +Default value: 1. + +## output_format_json_quote_64bit_floats {#output_format_json_quote_64bit_floats} + +Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats. + +Disabled by default. + +## output_format_json_quote_denormals {#output_format_json_quote_denormals} + +Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. + +**Example** + +Consider the following table `account_orders`: + +```text +┌─id─┬─name───┬─duration─┬─period─┬─area─┐ +│ 1 │ Andrew │ 20 │ 0 │ 400 │ +│ 2 │ John │ 40 │ 0 │ 0 │ +│ 3 │ Bob │ 15 │ 0 │ -100 │ +└────┴────────┴──────────┴────────┴──────┘ +``` + +When `output_format_json_quote_denormals = 0`, the query returns `null` values in output: + +```sql +SELECT area/period FROM account_orders FORMAT JSON; +``` + +```json +{ + "meta": + [ + { + "name": "divide(area, period)", + "type": "Float64" + } + ], + + "data": + [ + { + "divide(area, period)": null + }, + { + "divide(area, period)": null + }, + { + "divide(area, period)": null + } + ], + + "rows": 3, + + "statistics": + { + "elapsed": 0.003648093, + "rows_read": 3, + "bytes_read": 24 + } +} +``` + +When `output_format_json_quote_denormals = 1`, the query returns: + +```json +{ + "meta": + [ + { + "name": "divide(area, period)", + "type": "Float64" + } + ], + + "data": + [ + { + "divide(area, period)": "inf" + }, + { + "divide(area, period)": "-nan" + }, + { + "divide(area, period)": "-inf" + } + ], + + "rows": 3, + + "statistics": + { + "elapsed": 0.000070241, + "rows_read": 3, + "bytes_read": 24 + } +} +``` + +## output_format_json_quote_decimals {#output_format_json_quote_decimals} + +Controls quoting of decimals in JSON output formats. + +Disabled by default. + +## output_format_json_escape_forward_slashes {#output_format_json_escape_forward_slashes} + +Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped. + +Enabled by default. + +## output_format_json_named_tuples_as_objects {#output_format_json_named_tuples_as_objects} + +Serialize named tuple columns as JSON objects. + +Enabled by default. + +## input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects} + +Parse named tuple columns as JSON objects. + +Enabled by default. + +## input_format_json_ignore_unknown_keys_in_named_tuple {#input_format_json_ignore_unknown_keys_in_named_tuple} + +Ignore unknown keys in json object for named tuples. + +Disabled by default. + +## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple} + +Insert default values for missing elements in JSON object while parsing named tuple. +This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled. + +Enabled by default. + +## output_format_json_array_of_rows {#output_format_json_array_of_rows} + +Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format. + +Possible values: + +- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. +- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. + +Default value: `0`. + +**Example of a query with the enabled setting** + +Query: + +```sql +SET output_format_json_array_of_rows = 1; +SELECT number FROM numbers(3) FORMAT JSONEachRow; +``` + +Result: + +```text +[ +{"number":"0"}, +{"number":"1"}, +{"number":"2"} +] +``` + +**Example of a query with the disabled setting** + +Query: + +```sql +SET output_format_json_array_of_rows = 0; +SELECT number FROM numbers(3) FORMAT JSONEachRow; +``` + +Result: + +```text +{"number":"0"} +{"number":"1"} +{"number":"2"} +``` + +## output_format_json_validate_utf8 {#output_format_json_validate_utf8} + +Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8. + +Disabled by default. + +## format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name} + +The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format. +Column type should be String. If value is empty, default names `row_{i}`will be used for object names. + +Default value: ''. + +## TSV format settings {#tsv-format-settings} + +### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} + +When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Disabled by default. + +### input_format_tsv_enum_as_number {#input_format_tsv_enum_as_number} + +When enabled, always treat enum values as enum ids for TSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. + +Possible values: + +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. + +Default value: 0. + +**Example** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_tsv_enum_as_number` setting is enabled: + +Query: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Query: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; +``` + +throws an exception. + +When the `input_format_tsv_enum_as_number` setting is disabled: + +Query: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value──┐ +│ 103 │ first │ +└─────┴────────┘ +``` + +### input_format_tsv_use_best_effort_in_schema_inference {#input_format_tsv_use_best_effort_in_schema_inference} + +Use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be treated as String. + +Enabled by default. + +### input_format_tsv_skip_first_lines {#input_format_tsv_skip_first_lines} + +The number of lines to skip at the beginning of data in TSV input format. + +Default value: `0`. + +### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line} + +Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). + +Disabled by default. + +### format_tsv_null_representation {#format_tsv_null_representation} + +Defines the representation of `NULL` for [TSV](../../interfaces/formats.md/#tabseparated) output and input formats. User can set any string as a value, for example, `My NULL`. + +Default value: `\N`. + +**Examples** + +Query + +```sql +SELECT * FROM tsv_custom_null FORMAT TSV; +``` + +Result + +```text +788 +\N +\N +``` + +Query + +```sql +SET format_tsv_null_representation = 'My NULL'; +SELECT * FROM tsv_custom_null FORMAT TSV; +``` + +Result + +```text +788 +My NULL +My NULL +``` + +## CSV format settings {#csv-format-settings} + +### format_csv_delimiter {#format_csv_delimiter} + +The character is interpreted as a delimiter in the CSV data. + +Default value: `,`. + +### format_csv_allow_single_quotes {#format_csv_allow_single_quotes} + +If it is set to true, allow strings in single quotes. + +Enabled by default. + +### format_csv_allow_double_quotes {#format_csv_allow_double_quotes} + +If it is set to true, allow strings in double quotes. + +Enabled by default. + +### output_format_csv_crlf_end_of_line {#output_format_csv_crlf_end_of_line} + +Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). + +Disabled by default. + +### input_format_csv_enum_as_number {#input_format_csv_enum_as_number} + +When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. + +Possible values: + +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. + +Default value: 0. + +**Examples** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_csv_enum_as_number` setting is enabled: + +Query: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Query: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' +``` + +throws an exception. + +When the `input_format_csv_enum_as_number` setting is disabled: + +Query: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value─┐ +│ 103 │ first │ +└─────┴───────┘ +``` + +### input_format_csv_arrays_as_nested_csv {#input_format_csv_arrays_as_nested_csv} + +When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted. + +Disabled by default. + +### input_format_csv_empty_as_default {#input_format_csv_empty_as_default} + +When enabled, replace empty input fields in CSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Enabled by default. + +### input_format_csv_use_best_effort_in_schema_inference {#input_format_csv_use_best_effort_in_schema_inference} + +Use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be treated as String. + +Enabled by default. + +### input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} + +The number of lines to skip at the beginning of data in CSV input format. + +Default value: `0`. + +### format_csv_null_representation {#format_csv_null_representation} + +Defines the representation of `NULL` for [CSV](../../interfaces/formats.md/#csv) output and input formats. User can set any string as a value, for example, `My NULL`. + +Default value: `\N`. + +**Examples** + +Query + +```sql +SELECT * from csv_custom_null FORMAT CSV; +``` + +Result + +```text +788 +\N +\N +``` + +Query + +```sql +SET format_csv_null_representation = 'My NULL'; +SELECT * FROM csv_custom_null FORMAT CSV; +``` + +Result + +```text +788 +My NULL +My NULL +``` + +## Values format settings {#values-format-settings} + +### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} + +Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md/#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section. + +Possible values: + +- 0 — Disabled. + + In this case, you must provide formatted data. See the [Formats](../../interfaces/formats.md) section. + +- 1 — Enabled. + + In this case, you can use an SQL expression as a value, but data insertion is much slower this way. If you insert only formatted data, then ClickHouse behaves as if the setting value is 0. + +Default value: 1. + +Example of Use + +Insert the [DateTime](../../sql-reference/data-types/datetime.md) type value with the different settings. + +``` sql +SET input_format_values_interpret_expressions = 0; +INSERT INTO datetime_t VALUES (now()) +``` + +``` text +Exception on client: +Code: 27. DB::Exception: Cannot parse input: expected ) before: now()): (at row 1) +``` + +``` sql +SET input_format_values_interpret_expressions = 1; +INSERT INTO datetime_t VALUES (now()) +``` + +``` text +Ok. +``` + +The last query is equivalent to the following: + +``` sql +SET input_format_values_interpret_expressions = 0; +INSERT INTO datetime_t SELECT now() +``` + +``` text +Ok. +``` + +### input_format_values_deduce_templates_of_expressions {#input_format_values_deduce_templates_of_expressions} + +Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md/#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +For the following query: + +``` sql +INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... +``` + +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). +- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. + +### input_format_values_accurate_types_of_literals {#input_format_values_accurate_types_of_literals} + +This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. Expressions for some column may have the same structure, but contain numeric literals of different types, e.g. + +``` sql +(..., abs(0), ...), -- UInt64 literal +(..., abs(3.141592654), ...), -- Float64 literal +(..., abs(-1), ...), -- Int64 literal +``` + +Possible values: + +- 0 — Disabled. + + In this case, ClickHouse may use a more general type for some literals (e.g., `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. + +- 1 — Enabled. + + In this case, ClickHouse checks the actual type of literal and uses an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. + +Default value: 1. + +## Arrow format settings {#arrow-format-settings} + +### input_format_arrow_import_nested {#input_format_arrow_import_nested} + +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. + +Possible values: + +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. + +Default value: `0`. + +### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} + +Ignore case when matching Arrow column names with ClickHouse column names. + +Disabled by default. + +### input_format_arrow_allow_missing_columns {#input_format_arrow_allow_missing_columns} + +While importing data, when column is not found in schema default value will be used instead of error. + +Disabled by default. + +### input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference {#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format Arrow. + +Disabled by default. + +### output_format_arrow_low_cardinality_as_dictionary {#output_format_arrow_low_cardinality_as_dictionary} + +Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format for `SELECT` queries. + +Possible values: + +- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. +- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. + +Default value: `0`. + +### output_format_arrow_string_as_string {#output_format_arrow_string_as_string} + +Use Arrow String type instead of Binary for String columns. + +Disabled by default. + +## ORC format settings {#orc-format-settings} + +### input_format_orc_import_nested {#input_format_orc_import_nested} + +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. + +Possible values: + +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. + +Default value: `0`. + +### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} + +Batch size when reading ORC stripes. + +Default value: `100'000` + +### input_format_orc_case_insensitive_column_matching {#input_format_orc_case_insensitive_column_matching} + +Ignore case when matching ORC column names with ClickHouse column names. + +Disabled by default. + +### input_format_orc_allow_missing_columns {#input_format_orc_allow_missing_columns} + +While importing data, when column is not found in schema default value will be used instead of error. + +Disabled by default. + +### input_format_orc_skip_columns_with_unsupported_types_in_schema_inference {#input_format_orc_skip_columns_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format Arrow. + +Disabled by default. + +### output_format_orc_string_as_string {#output_format_orc_string_as_string} + +Use ORC String type instead of Binary for String columns. + +Disabled by default. + +## Parquet format settings {#parquet-format-settings} + +### input_format_parquet_import_nested {#input_format_parquet_import_nested} + +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. + +Possible values: + +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. + +Default value: `0`. + +### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} + +Ignore case when matching Parquet column names with ClickHouse column names. + +Disabled by default. + +### output_format_parquet_row_group_size {#output_format_parquet_row_group_size} + +Row group size in rows. + +Default value: `1'000'000`. + +### input_format_parquet_allow_missing_columns {#input_format_parquet_allow_missing_columns} + +While importing data, when column is not found in schema default value will be used instead of error. + +Disabled by default. + +### input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference {#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format Parquet. + +Disabled by default. + +### output_format_parquet_string_as_string {#output_format_parquet_string_as_string} + +Use Parquet String type instead of Binary for String columns. + +Disabled by default. + +## Hive format settings {#hive-format-settings} + +### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} + +Delimiter between fields in Hive Text File. + +Default value: `\x01`. + +### input_format_hive_text_collection_items_delimiter {#input_format_hive_text_collection_items_delimiter} + +Delimiter between collection(array or map) items in Hive Text File. + +Default value: `\x02`. + +### input_format_hive_text_map_keys_delimiter {#input_format_hive_text_map_keys_delimiter} + +Delimiter between a pair of map key/values in Hive Text File. + +Default value: `\x03`. + +## MsgPack format settings {#msgpack-format-settings} + +### input_format_msgpack_number_of_columns {#input_format_msgpack_number_of_columns} + +The number of columns in inserted MsgPack data. Used for automatic schema inference from data. + +Default value: `0`. + +### output_format_msgpack_uuid_representation {#output_format_msgpack_uuid_representation} + +The way how to output UUID in MsgPack format. +Possible values: + +- `bin` - as 16-bytes binary. +- `str` - as a string of 36 bytes. +- `ext` - as extention with ExtType = 2. + +Default value: `ext`. + + +## Protobuf format settings {#protobuf-format-settings} + +### input_format_protobuf_flatten_google_wrappers {#input_format_protobuf_flatten_google_wrappers} + +Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls. + +Disabled by default. + +### output_format_protobuf_nullables_with_google_wrappers {#output_format_protobuf_nullables_with_google_wrappers} + +When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized. + +Disabled by default. + +## Avro format settings {#avro-format-settings} + +### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} + +Enables using fields that are not specified in [Avro](../../interfaces/formats.md/#data-format-avro) or [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format schema. When a field is not found in the schema, ClickHouse uses the default value instead of throwing an exception. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. + +### format_avro_schema_registry_url {#format_avro_schema_registry_url} + +Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. + +Default value: `Empty`. + +### output_format_avro_codec {#output_format_avro_codec} + +Sets the compression codec used for output Avro file. + +Type: string + +Possible values: + +- `null` — No compression +- `deflate` — Compress with Deflate (zlib) +- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) + +Default value: `snappy` (if available) or `deflate`. + +### output_format_avro_sync_interval {#output_format_avro_sync_interval} + +Sets minimum data size (in bytes) between synchronization markers for output Avro file. + +Type: unsigned int + +Possible values: 32 (32 bytes) - 1073741824 (1 GiB) + +Default value: 32768 (32 KiB) + +### output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern} + +Regexp of column names of type String to output as Avro `string` (default is `bytes`). +RE2 syntax is supported. + +Type: string + +### output_format_avro_rows_in_file {#output_format_avro_rows_in_file} + +Max rows in a file (if permitted by storage). + +Default value: `1`. + +## Pretty formats settings {#pretty-formats-settings} + +### output_format_pretty_max_rows {#output_format_pretty_max_rows} + +Rows limit for Pretty formats. + +Default value: `10'000`. + +### output_format_pretty_max_column_pad_width {#output_format_pretty_max_column_pad_width} + +Maximum width to pad all values in a column in Pretty formats. + +Default value: `250`. + +### output_format_pretty_max_value_width {#output_format_pretty_max_value_width} + +Limits the width of value displayed in [Pretty](../../interfaces/formats.md/#pretty) formats. If the value width exceeds the limit, the value is cut. + +Possible values: + +- Positive integer. +- 0 — The value is cut completely. + +Default value: `10000` symbols. + +**Examples** + +Query: +```sql +SET output_format_pretty_max_value_width = 10; +SELECT range(number) FROM system.numbers LIMIT 10 FORMAT PrettyCompactNoEscapes; +``` +Result: +```text +┌─range(number)─┐ +│ [] │ +│ [0] │ +│ [0,1] │ +│ [0,1,2] │ +│ [0,1,2,3] │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +└───────────────┘ +``` + +Query with zero width: +```sql +SET output_format_pretty_max_value_width = 0; +SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes; +``` +Result: +```text +┌─range(number)─┐ +│ ⋯ │ +│ ⋯ │ +│ ⋯ │ +│ ⋯ │ +│ ⋯ │ +└───────────────┘ +``` + +### output_format_pretty_color {#output_format_pretty_color} + +Use ANSI escape sequences to paint colors in Pretty formats. + +Enabled by default. + +### output_format_pretty_grid_charset {#output_format_pretty_grid_charset} + +Allows changing a charset which is used for printing grids borders. Available charsets are UTF-8, ASCII. + +**Example** + +``` text +SET output_format_pretty_grid_charset = 'UTF-8'; +SELECT * FROM a; +┌─a─┐ +│ 1 │ +└───┘ + +SET output_format_pretty_grid_charset = 'ASCII'; +SELECT * FROM a; ++-a-+ +| 1 | ++---+ +``` + +### output_format_pretty_row_numbers {#output_format_pretty_row_numbers} + +Adds row numbers to output in the [Pretty](../../interfaces/formats.md/#pretty) format. + +Possible values: + +- 0 — Output without row numbers. +- 1 — Output with row numbers. + +Default value: `0`. + +**Example** + +Query: + +```sql +SET output_format_pretty_row_numbers = 1; +SELECT TOP 3 name, value FROM system.settings; +``` + +Result: +```text + ┌─name────────────────────┬─value───┐ +1. │ min_compress_block_size │ 65536 │ +2. │ max_compress_block_size │ 1048576 │ +3. │ max_block_size │ 65505 │ + └─────────────────────────┴─────────┘ +``` + +## Template format settings {#template-format-settings} + +### format_template_resultset {#format_template_resultset} + +Path to file which contains format string for result set (for Template format). + +### format_template_row {#format_template_row} + +Path to file which contains format string for rows (for Template format). + +### format_template_rows_between_delimiter {#format_template_rows_between_delimiter} + +Delimiter between rows (for Template format). + +## CustomSeparated format settings {custom-separated-format-settings} + +### format_custom_escaping_rule {#format_custom_escaping_rule} + +Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Possible values: + +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). + +Default value: `'Escaped'`. + +### format_custom_field_delimiter {#format_custom_field_delimiter} + +Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `'\t'`. + +### format_custom_row_before_delimiter {#format_custom_row_before_delimiter} + +Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +### format_custom_row_after_delimiter {#format_custom_row_after_delimiter} + +Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `'\n'`. + +### format_custom_row_between_delimiter {#format_custom_row_between_delimiter} + +Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +### format_custom_result_before_delimiter {#format_custom_result_before_delimiter} + +Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +### format_custom_result_after_delimiter {#format_custom_result_after_delimiter} + +Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +## Regexp format settings {#regexp-format-settings} + +### format_regexp_escaping_rule {#format_regexp_escaping_rule} + +Field escaping rule. + +Possible values: + +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). + +Default value: `Raw`. + +### format_regexp_skip_unmatched {#format_regexp_skip_unmatched} + +Skip lines unmatched by regular expression. + +Disabled by default. + +## CapnProto format settings {#capn-proto-format-settings} + +### format_capn_proto_enum_comparising_mode {#format_capn_proto_enum_comparising_mode} + +Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md/#capnproto) `Enum` data type from schema. + +Possible values: + +- `'by_values'` — Values in enums should be the same, names can be different. +- `'by_names'` — Names in enums should be the same, values can be different. +- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. + +Default value: `'by_values'`. + +## MySQLDump format settings {#musqldump-format-settings} + +### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) + +The name of the table from which to read data from in MySQLDump input format. + +### input_format_mysql_dump_map_columns (#input_format_mysql_dump_map_columns) + +Enables matching columns from table in MySQL dump and columns from ClickHouse table by names in MySQLDump input format. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## SQLInsert format settings {#sqlinsert-format-settings} + +### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} + +The maximum number of rows in one INSERT statement. + +Default value: `65505`. + +### output_format_sql_insert_table_name {#output_format_sql_insert_table_name} + +The name of table that will be used in the output INSERT statement. + +Default value: `'table''`. + +### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} + +Include column names in INSERT statement. + +Default value: `true`. + +### output_format_sql_insert_use_replace {#output_format_sql_insert_use_replace} + +Use REPLACE keyword instead of INSERT. + +Default value: `false`. + +### output_format_sql_insert_quote_names {#output_format_sql_insert_quote_names} + +Quote column names with "`" characters + +Default value: `true`. + +## BSONEachRow format settings {#bson-each-row-format-settings} + +### output_format_bson_string_as_string {#output_format_bson_string_as_string} + +Use BSON String type instead of Binary for String columns. + +Disabled by default. + +### input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format BSONEachRow. + +Disabled by default. + +## RowBinary format settings {#row-binary-format-settings} + +### format_binary_max_string_size {#format_binary_max_string_size} + +The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit. + +Default value: `1GiB` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e3f7bc11ddf..32224056114 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1,10 +1,11 @@ --- -sidebar_label: Settings -sidebar_position: 52 +sidebar_label: Core Settings +sidebar_position: 2 slug: /en/operations/settings/settings +toc_max_heading_level: 2 --- -# Settings +# Core Settings ## additional_table_filters @@ -408,51 +409,51 @@ Several algorithms can be specified, and an available one would be chosen for a Possible values: -### `default` +- default -This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`) + This is the equivalent of `hash` or `direct`, if possible (same as `direct,hash`) -### `grace_hash` +- grace_hash -[Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used. Grace hash provides an algorithm option that provides performant complex joins while limiting memory use. + [Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used. Grace hash provides an algorithm option that provides performant complex joins while limiting memory use. -The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. -### `hash` +- hash -[Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. + [Hash join algorithm](https://en.wikipedia.org/wiki/Hash_join) is used. The most generic implementation that supports all combinations of kind and strictness and multiple join keys that are combined with `OR` in the `JOIN ON` section. -### `parallel_hash` +- parallel_hash -A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process. + A variation of `hash` join that splits the data into buckets and builds several hashtables instead of one concurrently to speed up this process. -When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM. + When using the `hash` algorithm, the right part of `JOIN` is uploaded into RAM. -### `partial_merge` +- partial_merge -A variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted. + A variation of the [sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join), where only the right table is fully sorted. -The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). + The `RIGHT JOIN` and `FULL JOIN` are supported only with `ALL` strictness (`SEMI`, `ANTI`, `ANY`, and `ASOF` are not supported). -When using the `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by the `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks. + When using the `partial_merge` algorithm, ClickHouse sorts the data and dumps it to the disk. The `partial_merge` algorithm in ClickHouse differs slightly from the classic realization. First, ClickHouse sorts the right table by joining keys in blocks and creates a min-max index for sorted blocks. Then it sorts parts of the left table by the `join key` and joins them over the right table. The min-max index is also used to skip unneeded right table blocks. -### `direct` +- direct -This algorithm can be applied when the storage for the right table supports key-value requests. + This algorithm can be applied when the storage for the right table supports key-value requests. -The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs. + The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs. -### `auto` +- auto -When set to `auto`, `hash` join is tried first, and the algorithm is switched on the fly to another algorithm if the memory limit is violated. + When set to `auto`, `hash` join is tried first, and the algorithm is switched on the fly to another algorithm if the memory limit is violated. -### `full_sorting_merge` +- full_sorting_merge -[Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining. + [Sort-merge algorithm](https://en.wikipedia.org/wiki/Sort-merge_join) with full sorting joined tables before joining. -### `prefer_partial_merge` +- prefer_partial_merge -ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`. + ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`. ## join_any_take_last_row {#settings-join_any_take_last_row} @@ -1300,6 +1301,93 @@ Possible values: Default value: `3`. +## use_query_cache {#use-query-cache} + +If turned on, `SELECT` queries may utilize the [query cache](../query-cache.md). Parameters [enable_reads_from_query_cache](#enable-reads-from-query-cache) +and [enable_writes_to_query_cache](#enable-writes-to-query-cache) control in more detail how the cache is used. + +Possible values: + +- 0 - Yes +- 1 - No + +Default value: `0`. + +## enable_reads_from_query_cache {#enable-reads-from-query-cache} + +If turned on, results of `SELECT` queries are retrieved from the [query cache](../query-cache.md). + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `1`. + +## enable_writes_to_query_cache {#enable-writes-to-query-cache} + +If turned on, results of `SELECT` queries are stored in the [query cache](../query-cache.md). + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `1`. + +## query_cache_store_results_of_queries_with_nondeterministic_functions {#query--store-results-of-queries-with-nondeterministic-functions} + +If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md). + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `0`. + +## query_cache_min_query_runs {#query-cache-min-query-runs} + +Minimum number of times a `SELECT` query must run before its result is stored in the [query cache](../query-cache.md). + +Possible values: + +- Positive integer >= 0. + +Default value: `0` + +## query_cache_min_query_duration {#query-cache-min-query-duration} + +Minimum duration in milliseconds a query needs to run for its result to be stored in the [query cache](../query-cache.md). + +Possible values: + +- Positive integer >= 0. + +Default value: `0` + +## query_cache_ttl {#query-cache-ttl} + +After this time in seconds entries in the [query cache](../query-cache.md) become stale. + +Possible values: + +- Positive integer >= 0. + +Default value: `60` + +## query_cache_share_between_users {#query-cache-share-between-users} + +If turned on, the result of `SELECT` queries cached in the [query cache](../query-cache.md) can be read by other users. +It is not recommended to enable this setting due to security reasons. + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `0`. + ## insert_quorum {#settings-insert_quorum} Enables the quorum writes. @@ -1394,7 +1482,90 @@ By default, blocks inserted into replicated tables by the `INSERT` statement are For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)). For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window). -## async_insert_deduplicate {#settings-async-insert-deduplicate} +## Asynchronous Insert settings +### async_insert {#async-insert} + +Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts. + +If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. + +The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. + +If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted. + +Possible values: + +- 0 — Insertions are made synchronously, one after another. +- 1 — Multiple asynchronous insertions enabled. + +Default value: `0`. + +### async_insert_threads {#async-insert-threads} + +The maximum number of threads for background data parsing and insertion. + +Possible values: + +- Positive integer. +- 0 — Asynchronous insertions are disabled. + +Default value: `16`. + +### wait_for_async_insert {#wait-for-async-insert} + +Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` even if the data wasn't inserted. + +Possible values: + +- 0 — Server returns `OK` even if the data is not yet inserted. +- 1 — Server returns `OK` only after the data is inserted. + +Default value: `1`. + +### wait_for_async_insert_timeout {#wait-for-async-insert-timeout} + +The timeout in seconds for waiting for processing of asynchronous insertion. + +Possible values: + +- Positive integer. +- 0 — Disabled. + +Default value: [lock_acquire_timeout](#lock_acquire_timeout). + +### async_insert_max_data_size {#async-insert-max-data-size} + +The maximum size of the unparsed data in bytes collected per query before being inserted. + +Possible values: + +- Positive integer. +- 0 — Asynchronous insertions are disabled. + +Default value: `100000`. + +### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms} + +The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data. + +Possible values: + +- Positive integer. +- 0 — Timeout disabled. + +Default value: `200`. + +### async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms} + +The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded. + +Possible values: + +- Positive integer. +- 0 — Timeout disabled. + +Default value: `0`. +### async_insert_deduplicate {#settings-async-insert-deduplicate} Enables or disables insert deduplication of `ASYNC INSERT` (for Replicated\* tables). @@ -1434,7 +1605,7 @@ The setting allows a user to provide own deduplication semantic in MergeTree/Rep For example, by providing a unique value for the setting in each INSERT statement, user can avoid the same inserted data being deduplicated. -Possilbe values: +Possible values: - Any string @@ -1473,6 +1644,49 @@ SELECT * FROM test_table └───┘ ``` +## insert_keeper_max_retries + +The setting sets the maximum number of retries for ClickHouse Keeper (or ZooKeeper) requests during insert into replicated MergeTree. Only Keeper requests which failed due to network error, Keeper session timeout, or request timeout are considered for retries. + +Possible values: + +- Positive integer. +- 0 — Retries are disabled + +Default value: 0 + +Keeper request retries are done after some timeout. The timeout is controlled by the following settings: `insert_keeper_retry_initial_backoff_ms`, `insert_keeper_retry_max_backoff_ms`. +The first retry is done after `insert_keeper_retry_initial_backoff_ms` timeout. The consequent timeouts will be calculated as follows: +``` +timeout = min(insert_keeper_retry_max_backoff_ms, latest_timeout * 2) +``` + +For example, if `insert_keeper_retry_initial_backoff_ms=100`, `insert_keeper_retry_max_backoff_ms=10000` and `insert_keeper_max_retries=8` then timeouts will be `100, 200, 400, 800, 1600, 3200, 6400, 10000`. + +Apart from fault tolerance, the retries aim to provide a better user experience - they allow to avoid returning an error during INSERT execution if Keeper is restarted, for example, due to an upgrade. + +## insert_keeper_retry_initial_backoff_ms {#insert_keeper_retry_initial_backoff_ms} + +Initial timeout(in milliseconds) to retry a failed Keeper request during INSERT query execution + +Possible values: + +- Positive integer. +- 0 — No timeout + +Default value: 100 + +## insert_keeper_retry_max_backoff_ms {#insert_keeper_retry_max_backoff_ms} + +Maximum timeout (in milliseconds) to retry a failed Keeper request during INSERT query execution + +Possible values: + +- Positive integer. +- 0 — Maximum timeout is not limited + +Default value: 10000 + ## max_network_bytes {#settings-max-network-bytes} Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. @@ -1756,6 +1970,21 @@ Possible values: Default value: 0. +## optimize_skip_merged_partitions {#optimize-skip-merged-partitions} + +Enables or disables optimization for [OPTIMIZE TABLE ... FINAL](../../sql-reference/statements/optimize.md) query if there is only one part with level > 0 and it doesn't have expired TTL. + +- `OPTIMIZE TABLE ... FINAL SETTINGS optimize_skip_merged_partitions=1` + +By default, `OPTIMIZE TABLE ... FINAL` query rewrites the one part even if there is only a single part. + +Possible values: + +- 1 - Enable optimization. +- 0 - Disable optimization. + +Default value: 0. + ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} Enables or disables optimization by transforming some functions to reading subcolumns. This reduces the amount of data to read. @@ -2791,7 +3020,63 @@ Enables or disables truncate before insert in [File](../../engines/table-engines Possible values: - 0 — `INSERT` query appends new data to the end of the file. -- 1 — `INSERT` replaces existing content of the file with the new data. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## s3_truncate_on_insert + +Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## hdfs_truncate_on_insert + +Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## engine_file_allow_create_multiple_files + +Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern: + +`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## s3_create_new_file_on_insert + +Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern: + +initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## hdfs_create_new_file_on_insert + +Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern: + +initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. Default value: `0`. @@ -3404,6 +3689,30 @@ Default value: `0`. - [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting +## optimize_using_constraints + +Use [constraints](../../sql-reference/statements/create/table#constraints) for query optimization. The default is `false`. + +Possible values: + +- true, false + +## optimize_append_index + +Use [constraints](../../sql-reference/statements/create/table#constraints) in order to append index condition. The default is `false`. + +Possible values: + +- true, false + +## optimize_substitute_columns + +Use [constraints](../../sql-reference/statements/create/table#constraints) for column substitution. The default is `false`. + +Possible values: + +- true, false + ## describe_include_subcolumns {#describe_include_subcolumns} Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md/#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md/#finding-null) or an [Array](../../sql-reference/data-types/array.md/#array-size) data type. @@ -3419,88 +3728,6 @@ Default value: `0`. See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement. -## async_insert {#async-insert} - -Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts. - -If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. - -The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. - -If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted. - -Possible values: - -- 0 — Insertions are made synchronously, one after another. -- 1 — Multiple asynchronous insertions enabled. - -Default value: `0`. - -## async_insert_threads {#async-insert-threads} - -The maximum number of threads for background data parsing and insertion. - -Possible values: - -- Positive integer. -- 0 — Asynchronous insertions are disabled. - -Default value: `16`. - -## wait_for_async_insert {#wait-for-async-insert} - -Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` even if the data wasn't inserted. - -Possible values: - -- 0 — Server returns `OK` even if the data is not yet inserted. -- 1 — Server returns `OK` only after the data is inserted. - -Default value: `1`. - -## wait_for_async_insert_timeout {#wait-for-async-insert-timeout} - -The timeout in seconds for waiting for processing of asynchronous insertion. - -Possible values: - -- Positive integer. -- 0 — Disabled. - -Default value: [lock_acquire_timeout](#lock_acquire_timeout). - -## async_insert_max_data_size {#async-insert-max-data-size} - -The maximum size of the unparsed data in bytes collected per query before being inserted. - -Possible values: - -- Positive integer. -- 0 — Asynchronous insertions are disabled. - -Default value: `100000`. - -## async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms} - -The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data. - -Possible values: - -- Positive integer. -- 0 — Timeout disabled. - -Default value: `200`. - -## async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms} - -The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded. - -Possible values: - -- Positive integer. -- 0 — Timeout disabled. - -Default value: `0`. ## alter_partition_verbose_result {#alter-partition-verbose-result} @@ -3591,37 +3818,39 @@ Read more about [memory overcommit](memory-overcommit.md). Default value: `1GiB`. -## schema_inference_use_cache_for_file {schema_inference_use_cache_for_file} +## Schema Inference settings + +### schema_inference_use_cache_for_file {schema_inference_use_cache_for_file} Enable schemas cache for schema inference in `file` table function. Default value: `true`. -## schema_inference_use_cache_for_s3 {schema_inference_use_cache_for_s3} +### schema_inference_use_cache_for_s3 {schema_inference_use_cache_for_s3} Enable schemas cache for schema inference in `s3` table function. Default value: `true`. -## schema_inference_use_cache_for_url {schema_inference_use_cache_for_url} +### schema_inference_use_cache_for_url {schema_inference_use_cache_for_url} Enable schemas cache for schema inference in `url` table function. Default value: `true`. -## schema_inference_use_cache_for_hdfs {schema_inference_use_cache_for_hdfs} +### schema_inference_use_cache_for_hdfs {schema_inference_use_cache_for_hdfs} Enable schemas cache for schema inference in `hdfs` table function. Default value: `true`. -## schema_inference_cache_require_modification_time_for_url {#schema_inference_cache_require_modification_time_for_url} +### schema_inference_cache_require_modification_time_for_url {#schema_inference_cache_require_modification_time_for_url} Use schema from cache for URL with last modification time validation (for urls with Last-Modified header). If this setting is enabled and URL doesn't have Last-Modified header, schema from cache won't be used. Default value: `true`. -## use_structure_from_insertion_table_in_table_functions {use_structure_from_insertion_table_in_table_functions} +### use_structure_from_insertion_table_in_table_functions {use_structure_from_insertion_table_in_table_functions} Use structure from insertion table instead of schema inference from data. @@ -3670,1485 +3899,6 @@ Possible values: Default value: `0`. -!!! note "Warning" - Use this setting only for backward compatibility if your use cases depend on old syntax. - -# Format settings {#format-settings} - -## input_format_skip_unknown_fields {#input_format_skip_unknown_fields} - -Enables or disables skipping insertion of extra data. - -When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse does not insert extra data and does not throw an exception. - -Supported formats: - -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) -- [TSKV](../../interfaces/formats.md/#tskv) -- All formats with suffixes WithNames/WithNamesAndTypes -- [JSONColumns](../../interfaces/formats.md/#jsoncolumns) -- [MySQLDump](../../interfaces/formats.md/#mysqldump) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_with_names_use_header {#input_format_with_names_use_header} - -Enables or disables checking the column order when inserting data. - -To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table. - -Supported formats: - -- [CSVWithNames](../../interfaces/formats.md/#csvwithnames) -- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) -- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames) -- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) -- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames) -- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) -- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames) -- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) -- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames) -- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes) -- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames) -- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_with_types_use_header {#input_format_with_types_use_header} - -Controls whether format parser should check if data types from the input data match data types from the target table. - -Supported formats: - -- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) -- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) -- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) -- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) -- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) -- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_defaults_for_omitted_fields {#input_format_defaults_for_omitted_fields} - -When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. - :::note -When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +Use this setting only for backward compatibility if your use cases depend on old syntax. ::: - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_null_as_default {#input_format_null_as_default} - -Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). -If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. - -This setting is applicable to [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) queries for text input formats. - -Possible values: - -- 0 — Inserting `NULL` into a not nullable column causes an exception. -- 1 — `NULL` fields are initialized with default column values. - -Default value: `1`. - -## input_format_allow_seeks {#input_format_allow_seeks} - -Allow seeks while reading in ORC/Parquet/Arrow input formats. - -Enabled by default. - -## input_format_max_rows_to_read_for_schema_inference {#input_format_max_rows_to_read_for_schema_inference} - -The maximum rows of data to read for automatic schema inference. - -Default value: `25'000`. - -## column_names_for_schema_inference {#column_names_for_schema_inference} - -The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...' - -## schema_inference_hints {#schema_inference_hints} - -The list of column names and types to use as hints in schema inference for formats without schema. - -Example: - -Query: -```sql -desc format(JSONEachRow, '{"x" : 1, "y" : "String", "z" : "0.0.0.0" }') settings schema_inference_hints='x UInt8, z IPv4'; -``` - -Result: -```sql -x UInt8 -y Nullable(String) -z IPv4 -``` - -## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} - -Controls making inferred types `Nullable` in schema inference for formats without information about nullability. -If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. - -Default value: `true`. - -## input_format_try_infer_integers {#input_format_try_infer_integers} - -If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. - -Enabled by default. - -## input_format_try_infer_dates {#input_format_try_infer_dates} - -If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. - -Enabled by default. - -## input_format_try_infer_datetimes {#input_format_try_infer_datetimes} - -If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. - -Enabled by default. - -## date_time_input_format {#date_time_input_format} - -Allows choosing a parser of the text representation of date and time. - -The setting does not apply to [date and time functions](../../sql-reference/functions/date-time-functions.md). - -Possible values: - -- `'best_effort'` — Enables extended parsing. - - ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. - -- `'basic'` — Use basic parser. - - ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. - -Default value: `'basic'`. - -See also: - -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) - -## date_time_output_format {#date_time_output_format} - -Allows choosing different output formats of the text representation of date and time. - -Possible values: - -- `simple` - Simple output format. - - ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. - -- `iso` - ISO output format. - - ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). - -- `unix_timestamp` - Unix timestamp output format. - - ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. - -Default value: `simple`. - -See also: - -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) - -## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} - -Deserialization of IPv4 will use default values instead of throwing exception on conversion error. - -Disabled by default. - -## input_format_ipv6_default_on_conversion_error {#input_format_ipv6_default_on_conversion_error} - -Deserialization of IPV6 will use default values instead of throwing exception on conversion error. - -Disabled by default. - -## bool_true_representation {#bool_true_representation} - -Text to represent true bool value in TSV/CSV/Vertical/Pretty formats. - -Default value: `true` - -## bool_false_representation {#bool_false_representation} - -Text to represent false bool value in TSV/CSV/Vertical/Pretty formats. - -Default value: `false` - -## output_format_decimal_trailing_zeros {#output_format_decimal_trailing_zeros} - -Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23. - -Disabled by default. - -## input_format_allow_errors_num {#input_format_allow_errors_num} - -Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). - -The default value is 0. - -Always pair it with `input_format_allow_errors_ratio`. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. - -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. - -## input_format_allow_errors_ratio {#input_format_allow_errors_ratio} - -Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). -The percentage of errors is set as a floating-point number between 0 and 1. - -The default value is 0. - -Always pair it with `input_format_allow_errors_num`. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. - -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. - -## format_schema {#format-schema} - -This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. - -## output_format_enable_streaming {#output_format_enable_streaming} - -Enable streaming in output formats that support it. - -Disabled by default. - -## output_format_write_statistics {#output_format_write_statistics} - -Write statistics about read rows, bytes, time elapsed in suitable output formats. - -Enabled by default - -## insert_distributed_one_random_shard {#insert_distributed_one_random_shard} - -Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key. - -By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards. - -Possible values: - -- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. -- 1 — Insertion is done randomly among all available shards when no distributed key is given. - -Default value: `0`. - -## JSON formats settings {#json-formats-settings} - -### input_format_import_nested_json {#input_format_import_nested_json} - -Enables or disables the insertion of JSON data with nested objects. - -Supported formats: - -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 0. - -See also: - -- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. - -### input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers} - -Allow parsing bools as numbers in JSON input formats. - -Enabled by default. - -### input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings} - -Allow parsing numbers as strings in JSON input formats. - -Disabled by default. - -### input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings} - -Allow parsing JSON objects as strings in JSON input formats. - -Example: - -```sql -SET input_format_json_read_objects_as_strings = 1; -CREATE TABLE test (id UInt64, obj String, date Date) ENGINE=Memory(); -INSERT INTO test FORMAT JSONEachRow {"id" : 1, "obj" : {"a" : 1, "b" : "Hello"}, "date" : "2020-01-01"}; -SELECT * FROM test; -``` - -Result: - -``` -┌─id─┬─obj──────────────────────┬───────date─┐ -│ 1 │ {"a" : 1, "b" : "Hello"} │ 2020-01-01 │ -└────┴──────────────────────────┴────────────┘ -``` - -Disabled by default. - -### input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata} - -For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1, -the types from metadata in input data will be compared with the types of the corresponding columns from the table. - -Enabled by default. - -### output_format_json_quote_64bit_integers {#output_format_json_quote_64bit_integers} - -Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format. -Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. - -Possible values: - -- 0 — Integers are output without quotes. -- 1 — Integers are enclosed in quotes. - -Default value: 1. - -### output_format_json_quote_64bit_floats {#output_format_json_quote_64bit_floats} - -Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats. - -Disabled by default. - -### output_format_json_quote_denormals {#output_format_json_quote_denormals} - -Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 0. - -**Example** - -Consider the following table `account_orders`: - -```text -┌─id─┬─name───┬─duration─┬─period─┬─area─┐ -│ 1 │ Andrew │ 20 │ 0 │ 400 │ -│ 2 │ John │ 40 │ 0 │ 0 │ -│ 3 │ Bob │ 15 │ 0 │ -100 │ -└────┴────────┴──────────┴────────┴──────┘ -``` - -When `output_format_json_quote_denormals = 0`, the query returns `null` values in output: - -```sql -SELECT area/period FROM account_orders FORMAT JSON; -``` - -```json -{ - "meta": - [ - { - "name": "divide(area, period)", - "type": "Float64" - } - ], - - "data": - [ - { - "divide(area, period)": null - }, - { - "divide(area, period)": null - }, - { - "divide(area, period)": null - } - ], - - "rows": 3, - - "statistics": - { - "elapsed": 0.003648093, - "rows_read": 3, - "bytes_read": 24 - } -} -``` - -When `output_format_json_quote_denormals = 1`, the query returns: - -```json -{ - "meta": - [ - { - "name": "divide(area, period)", - "type": "Float64" - } - ], - - "data": - [ - { - "divide(area, period)": "inf" - }, - { - "divide(area, period)": "-nan" - }, - { - "divide(area, period)": "-inf" - } - ], - - "rows": 3, - - "statistics": - { - "elapsed": 0.000070241, - "rows_read": 3, - "bytes_read": 24 - } -} -``` - -### output_format_json_quote_decimals {#output_format_json_quote_decimals} - -Controls quoting of decimals in JSON output formats. - -Disabled by default. - -### output_format_json_escape_forward_slashes {#output_format_json_escape_forward_slashes} - -Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped. - -Enabled by default. - -### output_format_json_named_tuples_as_objects {#output_format_json_named_tuples_as_objects} - -Serialize named tuple columns as JSON objects. - -Enabled by default. - -### input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects} - -Parse named tuple columns as JSON objects. - -Enabled by default. - -### input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple} - -Insert default values for missing elements in JSON object while parsing named tuple. -This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled. - -Enabled by default. - -### output_format_json_array_of_rows {#output_format_json_array_of_rows} - -Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format. - -Possible values: - -- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. -- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. - -Default value: `0`. - -**Example of a query with the enabled setting** - -Query: - -```sql -SET output_format_json_array_of_rows = 1; -SELECT number FROM numbers(3) FORMAT JSONEachRow; -``` - -Result: - -```text -[ -{"number":"0"}, -{"number":"1"}, -{"number":"2"} -] -``` - -**Example of a query with the disabled setting** - -Query: - -```sql -SET output_format_json_array_of_rows = 0; -SELECT number FROM numbers(3) FORMAT JSONEachRow; -``` - -Result: - -```text -{"number":"0"} -{"number":"1"} -{"number":"2"} -``` - -### output_format_json_validate_utf8 {#output_format_json_validate_utf8} - -Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8. - -Disabled by default. - -### format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name} - -The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format. -Column type should be String. If value is empty, default names `row_{i}`will be used for object names. - -Default value: ''. - -## TSV format settings {#tsv-format-settings} - -### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} - -When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. - -Disabled by default. - -### input_format_tsv_enum_as_number {#input_format_tsv_enum_as_number} - -When enabled, always treat enum values as enum ids for TSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. - -Possible values: - -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. - -Default value: 0. - -**Example** - -Consider the table: - -```sql -CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); -``` - -When the `input_format_tsv_enum_as_number` setting is enabled: - -Query: - -```sql -SET input_format_tsv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -SELECT * FROM table_with_enum_column_for_tsv_insert; -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -``` - -Query: - -```sql -SET input_format_tsv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; -``` - -throws an exception. - -When the `input_format_tsv_enum_as_number` setting is disabled: - -Query: - -```sql -SET input_format_tsv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; -SELECT * FROM table_with_enum_column_for_tsv_insert; -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -┌──Id─┬─Value──┐ -│ 103 │ first │ -└─────┴────────┘ -``` - -### input_format_tsv_use_best_effort_in_schema_inference {#input_format_tsv_use_best_effort_in_schema_inference} - -Use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be treated as String. - -Enabled by default. - -### input_format_tsv_skip_first_lines {#input_format_tsv_skip_first_lines} - -The number of lines to skip at the beginning of data in TSV input format. - -Default value: `0`. - -### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line} - -Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). - -Disabled by default. - -### format_tsv_null_representation {#format_tsv_null_representation} - -Defines the representation of `NULL` for [TSV](../../interfaces/formats.md/#tabseparated) output and input formats. User can set any string as a value, for example, `My NULL`. - -Default value: `\N`. - -**Examples** - -Query - -```sql -SELECT * FROM tsv_custom_null FORMAT TSV; -``` - -Result - -```text -788 -\N -\N -``` - -Query - -```sql -SET format_tsv_null_representation = 'My NULL'; -SELECT * FROM tsv_custom_null FORMAT TSV; -``` - -Result - -```text -788 -My NULL -My NULL -``` - -## CSV format settings {#csv-format-settings} - -### format_csv_delimiter {#format_csv_delimiter} - -The character is interpreted as a delimiter in the CSV data. - -Default value: `,`. - -### format_csv_allow_single_quotes {#format_csv_allow_single_quotes} - -If it is set to true, allow strings in single quotes. - -Enabled by default. - -### format_csv_allow_double_quotes {#format_csv_allow_double_quotes} - -If it is set to true, allow strings in double quotes. - -Enabled by default. - -### output_format_csv_crlf_end_of_line {#output_format_csv_crlf_end_of_line} - -Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). - -Disabled by default. - -### input_format_csv_enum_as_number {#input_format_csv_enum_as_number} - -When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. - -Possible values: - -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. - -Default value: 0. - -**Examples** - -Consider the table: - -```sql -CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); -``` - -When the `input_format_csv_enum_as_number` setting is enabled: - -Query: - -```sql -SET input_format_csv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -``` - -Query: - -```sql -SET input_format_csv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' -``` - -throws an exception. - -When the `input_format_csv_enum_as_number` setting is disabled: - -Query: - -```sql -SET input_format_csv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' -SELECT * FROM table_with_enum_column_for_csv_insert; -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -┌──Id─┬─Value─┐ -│ 103 │ first │ -└─────┴───────┘ -``` - -### input_format_csv_arrays_as_nested_csv {#input_format_csv_arrays_as_nested_csv} - -When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted. - -Disabled by default. - -### input_format_csv_empty_as_default {#input_format_csv_empty_as_default} - -When enabled, replace empty input fields in CSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. - -Enabled by default. - -### input_format_csv_use_best_effort_in_schema_inference {#input_format_csv_use_best_effort_in_schema_inference} - -Use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be treated as String. - -Enabled by default. - -### input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} - -The number of lines to skip at the beginning of data in CSV input format. - -Default value: `0`. - -### format_csv_null_representation {#format_csv_null_representation} - -Defines the representation of `NULL` for [CSV](../../interfaces/formats.md/#csv) output and input formats. User can set any string as a value, for example, `My NULL`. - -Default value: `\N`. - -**Examples** - -Query - -```sql -SELECT * from csv_custom_null FORMAT CSV; -``` - -Result - -```text -788 -\N -\N -``` - -Query - -```sql -SET format_csv_null_representation = 'My NULL'; -SELECT * FROM csv_custom_null FORMAT CSV; -``` - -Result - -```text -788 -My NULL -My NULL -``` - -## Values format settings {#values-format-settings} - -### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} - -Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md/#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section. - -Possible values: - -- 0 — Disabled. - - In this case, you must provide formatted data. See the [Formats](../../interfaces/formats.md) section. - -- 1 — Enabled. - - In this case, you can use an SQL expression as a value, but data insertion is much slower this way. If you insert only formatted data, then ClickHouse behaves as if the setting value is 0. - -Default value: 1. - -Example of Use - -Insert the [DateTime](../../sql-reference/data-types/datetime.md) type value with the different settings. - -``` sql -SET input_format_values_interpret_expressions = 0; -INSERT INTO datetime_t VALUES (now()) -``` - -``` text -Exception on client: -Code: 27. DB::Exception: Cannot parse input: expected ) before: now()): (at row 1) -``` - -``` sql -SET input_format_values_interpret_expressions = 1; -INSERT INTO datetime_t VALUES (now()) -``` - -``` text -Ok. -``` - -The last query is equivalent to the following: - -``` sql -SET input_format_values_interpret_expressions = 0; -INSERT INTO datetime_t SELECT now() -``` - -``` text -Ok. -``` - -### input_format_values_deduce_templates_of_expressions {#input_format_values_deduce_templates_of_expressions} - -Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md/#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -For the following query: - -``` sql -INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... -``` - -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). -- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. - -### input_format_values_accurate_types_of_literals {#input_format_values_accurate_types_of_literals} - -This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. Expressions for some column may have the same structure, but contain numeric literals of different types, e.g. - -``` sql -(..., abs(0), ...), -- UInt64 literal -(..., abs(3.141592654), ...), -- Float64 literal -(..., abs(-1), ...), -- Int64 literal -``` - -Possible values: - -- 0 — Disabled. - - In this case, ClickHouse may use a more general type for some literals (e.g., `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. - -- 1 — Enabled. - - In this case, ClickHouse checks the actual type of literal and uses an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. - -Default value: 1. - -## Arrow format settings {#arrow-format-settings} - -### input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - -### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} - -Ignore case when matching Arrow column names with ClickHouse column names. - -Disabled by default. - -### input_format_arrow_allow_missing_columns {#input_format_arrow_allow_missing_columns} - -While importing data, when column is not found in schema default value will be used instead of error. - -Disabled by default. - -### input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference {#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format Arrow. - -Disabled by default. - -### output_format_arrow_low_cardinality_as_dictionary {#output_format_arrow_low_cardinality_as_dictionary} - -Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format for `SELECT` queries. - -Possible values: - -- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. -- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. - -Default value: `0`. - -### output_format_arrow_string_as_string {#output_format_arrow_string_as_string} - -Use Arrow String type instead of Binary for String columns. - -Disabled by default. - -## ORC format settings {#orc-format-settings} - -### input_format_orc_import_nested {#input_format_orc_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - -### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} - -Batch size when reading ORC stripes. - -Default value: `100'000` - -### input_format_orc_case_insensitive_column_matching {#input_format_orc_case_insensitive_column_matching} - -Ignore case when matching ORC column names with ClickHouse column names. - -Disabled by default. - -### input_format_orc_allow_missing_columns {#input_format_orc_allow_missing_columns} - -While importing data, when column is not found in schema default value will be used instead of error. - -Disabled by default. - -### input_format_orc_skip_columns_with_unsupported_types_in_schema_inference {#input_format_orc_skip_columns_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format Arrow. - -Disabled by default. - -### output_format_orc_string_as_string {#output_format_orc_string_as_string} - -Use ORC String type instead of Binary for String columns. - -Disabled by default. - -## Parquet format settings {#parquet-format-settings} - -## input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - -### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} - -Ignore case when matching Parquet column names with ClickHouse column names. - -Disabled by default. - -### output_format_parquet_row_group_size {#output_format_parquet_row_group_size} - -Row group size in rows. - -Default value: `1'000'000`. - -### input_format_parquet_allow_missing_columns {#input_format_parquet_allow_missing_columns} - -While importing data, when column is not found in schema default value will be used instead of error. - -Disabled by default. - -### input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference {#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format Parquet. - -Disabled by default. - -### output_format_parquet_string_as_string {#output_format_parquet_string_as_string} - -Use Parquet String type instead of Binary for String columns. - -Disabled by default. - -## Hive format settings {#hive-format-settings} - -### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} - -Delimiter between fields in Hive Text File. - -Default value: `\x01`. - -### input_format_hive_text_collection_items_delimiter {#input_format_hive_text_collection_items_delimiter} - -Delimiter between collection(array or map) items in Hive Text File. - -Default value: `\x02`. - -### input_format_hive_text_map_keys_delimiter {#input_format_hive_text_map_keys_delimiter} - -Delimiter between a pair of map key/values in Hive Text File. - -Default value: `\x03`. - -## MsgPack format settings {#msgpack-format-settings} - -### input_format_msgpack_number_of_columns {#input_format_msgpack_number_of_columns} - -The number of columns in inserted MsgPack data. Used for automatic schema inference from data. - -Default value: `0`. - -### output_format_msgpack_uuid_representation {#output_format_msgpack_uuid_representation} - -The way how to output UUID in MsgPack format. -Possible values: - -- `bin` - as 16-bytes binary. -- `str` - as a string of 36 bytes. -- `ext` - as extention with ExtType = 2. - -Default value: `ext`. - - -## Protobuf format settings {#protobuf-format-settings} - -### input_format_protobuf_flatten_google_wrappers {#input_format_protobuf_flatten_google_wrappers} - -Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls. - -Disabled by default. - -### output_format_protobuf_nullables_with_google_wrappers {#output_format_protobuf_nullables_with_google_wrappers} - -When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized. - -Disabled by default. - -## Avro format settings {#avro-format-settings} - -### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} - -Enables using fields that are not specified in [Avro](../../interfaces/formats.md/#data-format-avro) or [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format schema. When a field is not found in the schema, ClickHouse uses the default value instead of throwing an exception. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 0. - -### format_avro_schema_registry_url {#format_avro_schema_registry_url} - -Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. - -Default value: `Empty`. - -### output_format_avro_codec {#output_format_avro_codec} - -Sets the compression codec used for output Avro file. - -Type: string - -Possible values: - -- `null` — No compression -- `deflate` — Compress with Deflate (zlib) -- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) - -Default value: `snappy` (if available) or `deflate`. - -### output_format_avro_sync_interval {#output_format_avro_sync_interval} - -Sets minimum data size (in bytes) between synchronization markers for output Avro file. - -Type: unsigned int - -Possible values: 32 (32 bytes) - 1073741824 (1 GiB) - -Default value: 32768 (32 KiB) - -### output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern} - -Regexp of column names of type String to output as Avro `string` (default is `bytes`). -RE2 syntax is supported. - -Type: string - -### output_format_avro_rows_in_file {#output_format_avro_rows_in_file} - -Max rows in a file (if permitted by storage). - -Default value: `1`. - -## Pretty formats settings {#pretty-formats-settings} - -### output_format_pretty_max_rows {#output_format_pretty_max_rows} - -Rows limit for Pretty formats. - -Default value: `10'000`. - -### output_format_pretty_max_column_pad_width {#output_format_pretty_max_column_pad_width} - -Maximum width to pad all values in a column in Pretty formats. - -Default value: `250`. - -### output_format_pretty_max_value_width {#output_format_pretty_max_value_width} - -Limits the width of value displayed in [Pretty](../../interfaces/formats.md/#pretty) formats. If the value width exceeds the limit, the value is cut. - -Possible values: - -- Positive integer. -- 0 — The value is cut completely. - -Default value: `10000` symbols. - -**Examples** - -Query: -```sql -SET output_format_pretty_max_value_width = 10; -SELECT range(number) FROM system.numbers LIMIT 10 FORMAT PrettyCompactNoEscapes; -``` -Result: -```text -┌─range(number)─┐ -│ [] │ -│ [0] │ -│ [0,1] │ -│ [0,1,2] │ -│ [0,1,2,3] │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -└───────────────┘ -``` - -Query with zero width: -```sql -SET output_format_pretty_max_value_width = 0; -SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes; -``` -Result: -```text -┌─range(number)─┐ -│ ⋯ │ -│ ⋯ │ -│ ⋯ │ -│ ⋯ │ -│ ⋯ │ -└───────────────┘ -``` - -### output_format_pretty_color {#output_format_pretty_color} - -Use ANSI escape sequences to paint colors in Pretty formats. - -Enabled by default. - -### output_format_pretty_grid_charset {#output_format_pretty_grid_charset} - -Allows changing a charset which is used for printing grids borders. Available charsets are UTF-8, ASCII. - -**Example** - -``` text -SET output_format_pretty_grid_charset = 'UTF-8'; -SELECT * FROM a; -┌─a─┐ -│ 1 │ -└───┘ - -SET output_format_pretty_grid_charset = 'ASCII'; -SELECT * FROM a; -+-a-+ -| 1 | -+---+ -``` - -### output_format_pretty_row_numbers {#output_format_pretty_row_numbers} - -Adds row numbers to output in the [Pretty](../../interfaces/formats.md/#pretty) format. - -Possible values: - -- 0 — Output without row numbers. -- 1 — Output with row numbers. - -Default value: `0`. - -**Example** - -Query: - -```sql -SET output_format_pretty_row_numbers = 1; -SELECT TOP 3 name, value FROM system.settings; -``` - -Result: -```text - ┌─name────────────────────┬─value───┐ -1. │ min_compress_block_size │ 65536 │ -2. │ max_compress_block_size │ 1048576 │ -3. │ max_block_size │ 65505 │ - └─────────────────────────┴─────────┘ -``` - -## Template format settings {#template-format-settings} - -### format_template_resultset {#format_template_resultset} - -Path to file which contains format string for result set (for Template format). - -### format_template_row {#format_template_row} - -Path to file which contains format string for rows (for Template format). - -### format_template_rows_between_delimiter {#format_template_rows_between_delimiter} - -Delimiter between rows (for Template format). - -## CustomSeparated format settings {custom-separated-format-settings} - -### format_custom_escaping_rule {#format_custom_escaping_rule} - -Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Possible values: - -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). - -Default value: `'Escaped'`. - -### format_custom_field_delimiter {#format_custom_field_delimiter} - -Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `'\t'`. - -### format_custom_row_before_delimiter {#format_custom_row_before_delimiter} - -Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -### format_custom_row_after_delimiter {#format_custom_row_after_delimiter} - -Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `'\n'`. - -### format_custom_row_between_delimiter {#format_custom_row_between_delimiter} - -Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -### format_custom_result_before_delimiter {#format_custom_result_before_delimiter} - -Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -### format_custom_result_after_delimiter {#format_custom_result_after_delimiter} - -Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -## Regexp format settings {#regexp-format-settings} - -### format_regexp_escaping_rule {#format_regexp_escaping_rule} - -Field escaping rule. - -Possible values: - -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). - -Default value: `Raw`. - -### format_regexp_skip_unmatched {#format_regexp_skip_unmatched} - -Skip lines unmatched by regular expression. - -Disabled by default. - -## CapnProto format settings {#capn-proto-format-settings} - -### format_capn_proto_enum_comparising_mode {#format_capn_proto_enum_comparising_mode} - -Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md/#capnproto) `Enum` data type from schema. - -Possible values: - -- `'by_values'` — Values in enums should be the same, names can be different. -- `'by_names'` — Names in enums should be the same, values can be different. -- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. - -Default value: `'by_values'`. - -## MySQLDump format settings {#musqldump-format-settings} - -### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) - -The name of the table from which to read data from in MySQLDump input format. - -### input_format_mysql_dump_map_columns (#input_format_mysql_dump_map_columns) - -Enables matching columns from table in MySQL dump and columns from ClickHouse table by names in MySQLDump input format. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## SQLInsert format settings {#sqlinsert-format-settings} - -### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} - -The maximum number of rows in one INSERT statement. - -Default value: `65505`. - -### output_format_sql_insert_table_name {#output_format_sql_insert_table_name} - -The name of table that will be used in the output INSERT statement. - -Default value: `'table''`. - -### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} - -Include column names in INSERT statement. - -Default value: `true`. - -### output_format_sql_insert_use_replace {#output_format_sql_insert_use_replace} - -Use REPLACE keyword instead of INSERT. - -Default value: `false`. - -### output_format_sql_insert_quote_names {#output_format_sql_insert_quote_names} - -Quote column names with "`" characters - -Default value: `true`. - -## BSONEachRow format settings {#bson-each-row-format-settings} - -### output_format_bson_string_as_string {#output_format_bson_string_as_string} - -Use BSON String type instead of Binary for String columns. - -Disabled by default. - -### input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format BSONEachRow. - -Disabled by default. - -## RowBinary format settings {#row-binary-format-settings} - -### format_binary_max_string_size {#format_binary_max_string_size} - -The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit. - -Default value: `1GiB` diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 19ff6e33142..551aa771ec9 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -9,6 +9,7 @@ Columns: - `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `description` ([String](../../sql-reference/data-types/string.md) - Metric description) **Example** @@ -17,18 +18,18 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 ``` ``` text -┌─metric──────────────────────────────────┬──────value─┐ -│ jemalloc.background_thread.run_interval │ 0 │ -│ jemalloc.background_thread.num_runs │ 0 │ -│ jemalloc.background_thread.num_threads │ 0 │ -│ jemalloc.retained │ 422551552 │ -│ jemalloc.mapped │ 1682989056 │ -│ jemalloc.resident │ 1656446976 │ -│ jemalloc.metadata_thp │ 0 │ -│ jemalloc.metadata │ 10226856 │ -│ UncompressedCacheCells │ 0 │ -│ MarkCacheFiles │ 0 │ -└─────────────────────────────────────────┴────────────┘ +┌─metric──────────────────────────────────┬──────value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ AsynchronousMetricsCalculationTimeSpent │ 0.00179053 │ Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics). │ +│ NumberOfDetachedByUserParts │ 0 │ The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed. │ +│ NumberOfDetachedParts │ 0 │ The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed. │ +│ TotalRowsOfMergeTreeTables │ 2781309 │ Total amount of rows (records) stored in all tables of MergeTree family. │ +│ TotalBytesOfMergeTreeTables │ 7741926 │ Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family. │ +│ NumberOfTables │ 93 │ Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables. The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`. │ +│ NumberOfDatabases │ 6 │ Total number of databases on the server. │ +│ MaxPartCountForPartition │ 6 │ Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading. │ +│ ReplicasSumMergesInQueue │ 0 │ Sum of merge operations in the queue (still to be applied) across Replicated tables. │ +│ ReplicasSumInsertsInQueue │ 0 │ Sum of INSERT operations in the queue (still to be replicated) across Replicated tables. │ +└─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` **See Also** diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 14a6b5ea786..284ba866cc8 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -72,3 +72,10 @@ If procfs is supported and enabled on the system, ClickHouse server collects the - `OSWriteChars` - `OSReadBytes` - `OSWriteBytes` + +## Related content + +- Blog: [System Tables and a window into the internals of ClickHouse](https://clickhouse.com/blog/clickhouse-debugging-issues-with-system-tables) +- Blog: [Essential monitoring queries - part 1 - INSERT queries](https://clickhouse.com/blog/monitoring-troubleshooting-insert-queries-clickhouse) +- Blog: [Essential monitoring queries - part 2 - SELECT queries](https://clickhouse.com/blog/monitoring-troubleshooting-select-queries-clickhouse) + diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md index ebb9d054476..4d8c67e1b90 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/maxmap.md +++ b/docs/en/sql-reference/aggregate-functions/reference/maxmap.md @@ -19,11 +19,11 @@ Example: ``` sql SELECT maxMap(a, b) -FROM values('a Array(Int32), b Array(Int64)', ([1, 2], [2, 2]), ([2, 3], [1, 1])) +FROM values('a Array(Char), b Array(Int64)', (['x', 'y'], [2, 2]), (['y', 'z'], [3, 1])) ``` ``` text -┌─maxMap(a, b)──────┐ -│ ([1,2,3],[2,2,1]) │ -└───────────────────┘ +┌─maxMap(a, b)───────────┐ +│ [['x','y','z'],[2,3,1]]│ +└────────────────────────┘ ``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/sum.md b/docs/en/sql-reference/aggregate-functions/reference/sum.md index 320bb73f9ac..c6dc4e549dd 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/sum.md +++ b/docs/en/sql-reference/aggregate-functions/reference/sum.md @@ -6,3 +6,7 @@ sidebar_position: 4 # sum Calculates the sum. Only works for numbers. + +``` +SELECT sum(salary) FROM employees; +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md index 133de88a07e..32b234fd6b8 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md +++ b/docs/en/sql-reference/aggregate-functions/reference/topkweighted.md @@ -28,15 +28,16 @@ Returns an array of the values with maximum approximate sum of weights. Query: ``` sql -SELECT topKWeighted(10)(number, number) FROM numbers(1000) +SELECT topKWeighted(2)(k, w) FROM +VALUES('k Char, w UInt64', ('y', 1), ('y', 1), ('x', 5), ('y', 1), ('z', 10)) ``` Result: ``` text -┌─topKWeighted(10)(number, number)──────────┐ -│ [999,998,997,996,995,994,993,992,991,990] │ -└───────────────────────────────────────────┘ +┌─topKWeighted(2)(k, w)──┐ +│ ['z','x'] │ +└────────────────────────┘ ``` **See Also** diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 6410de8199a..58a99baa09e 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -54,7 +54,9 @@ Functions: - [toLowCardinality](../../sql-reference/functions/type-conversion-functions.md#tolowcardinality) -## See Also +## Related content -- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). +- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/) +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) +- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index 3bb0b961072..4dc6fd33849 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -5,7 +5,7 @@ sidebar_label: Storing Dictionaries in Memory --- import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md'; -# Storing Dictionaries in Memory +# Storing Dictionaries in Memory There are a variety of ways to store dictionaries in memory. @@ -25,7 +25,7 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table. - + The configuration looks like this: @@ -290,36 +290,38 @@ This storage method works the same way as hashed and allows using date/time (arb Example: The table contains discounts for each advertiser in the format: ``` text -+---------|-------------|-------------|------+ -| advertiser id | discount start date | discount end date | amount | -+===============+=====================+===================+========+ -| 123 | 2015-01-01 | 2015-01-15 | 0.15 | -+---------|-------------|-------------|------+ -| 123 | 2015-01-16 | 2015-01-31 | 0.25 | -+---------|-------------|-------------|------+ -| 456 | 2015-01-01 | 2015-01-15 | 0.05 | -+---------|-------------|-------------|------+ +┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐ +│ 123 │ 2015-01-16 │ 2015-01-31 │ 0.25 │ +│ 123 │ 2015-01-01 │ 2015-01-15 │ 0.15 │ +│ 456 │ 2015-01-01 │ 2015-01-15 │ 0.05 │ +└───────────────┴─────────────────────┴───────────────────┴────────┘ ``` To use a sample for date ranges, define the `range_min` and `range_max` elements in the [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). These elements must contain elements `name` and `type` (if `type` is not specified, the default type will be used - Date). `type` can be any numeric type (Date / DateTime / UInt64 / Int32 / others). -:::warning +:::warning Values of `range_min` and `range_max` should fit in `Int64` type. ::: Example: ``` xml + + + + min + + - Id + advertiser_id - first + discount_start_date Date - last + discount_end_date Date ... @@ -328,17 +330,17 @@ Example: or ``` sql -CREATE DICTIONARY somedict ( - id UInt64, - first Date, - last Date, - advertiser_id UInt64 +CREATE DICTIONARY discounts_dict ( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Date, + amount Float64 ) PRIMARY KEY id -SOURCE(CLICKHOUSE(TABLE 'date_table')) +SOURCE(CLICKHOUSE(TABLE 'discounts')) LIFETIME(MIN 1 MAX 1000) -LAYOUT(RANGE_HASHED()) -RANGE(MIN first MAX last) +LAYOUT(RANGE_HASHED(range_lookup_strategy 'max')) +RANGE(MIN discount_start_date MAX discount_end_date) ``` To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected: @@ -349,16 +351,17 @@ dictGet('dict_name', 'attr_name', id, date) Query example: ``` sql -SELECT dictGet('somedict', 'advertiser_id', 1, '2022-10-20 23:20:10.000'::DateTime64::UInt64); +SELECT dictGet('discounts_dict', 'amount', 1, '2022-10-20'::Date); ``` This function returns the value for the specified `id`s and the date range that includes the passed date. Details of the algorithm: -- If the `id` is not found or a range is not found for the `id`, it returns the default value for the dictionary. -- If there are overlapping ranges, it returns value for any (random) range. -- If the range delimiter is `NULL` or an invalid date (such as 1900-01-01), the range is open. The range can be open on both sides. +- If the `id` is not found or a range is not found for the `id`, it returns the default value of the attribute's type. +- If there are overlapping ranges and `range_lookup_strategy=min`, it returns a matching range with minimal `range_min`, if several ranges found, it returns a range with minimal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. +- If there are overlapping ranges and `range_lookup_strategy=max`, it returns a matching range with maximal `range_min`, if several ranges found, it returns a range with maximal `range_max`, if again several ranges found (several ranges had the same `range_min` and `range_max` it returns a random range of them. +- If the `range_max` is `NULL`, the range is open. `NULL` is treated as maximal possible value. For the `range_min` `1970-01-01` or `0` (-MAX_INT) can be used as the open value. Configuration example: @@ -407,6 +410,108 @@ PRIMARY KEY Abcdef RANGE(MIN StartTimeStamp MAX EndTimeStamp) ``` +Configuration example with overlapping ranges and open ranges: + +```sql +CREATE TABLE discounts +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +ENGINE = Memory; + +INSERT INTO discounts VALUES (1, '2015-01-01', Null, 0.1); +INSERT INTO discounts VALUES (1, '2015-01-15', Null, 0.2); +INSERT INTO discounts VALUES (2, '2015-01-01', '2015-01-15', 0.3); +INSERT INTO discounts VALUES (2, '2015-01-04', '2015-01-10', 0.4); +INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-15', 0.5); +INSERT INTO discounts VALUES (3, '1970-01-01', '2015-01-10', 0.6); + +SELECT * FROM discounts ORDER BY advertiser_id, discount_start_date; +┌─advertiser_id─┬─discount_start_date─┬─discount_end_date─┬─amount─┐ +│ 1 │ 2015-01-01 │ ᴺᵁᴸᴸ │ 0.1 │ +│ 1 │ 2015-01-15 │ ᴺᵁᴸᴸ │ 0.2 │ +│ 2 │ 2015-01-01 │ 2015-01-15 │ 0.3 │ +│ 2 │ 2015-01-04 │ 2015-01-10 │ 0.4 │ +│ 3 │ 1970-01-01 │ 2015-01-15 │ 0.5 │ +│ 3 │ 1970-01-01 │ 2015-01-10 │ 0.6 │ +└───────────────┴─────────────────────┴───────────────────┴────────┘ + +-- RANGE_LOOKUP_STRATEGY 'max' + +CREATE DICTIONARY discounts_dict +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +PRIMARY KEY advertiser_id +SOURCE(CLICKHOUSE(TABLE discounts)) +LIFETIME(MIN 600 MAX 900) +LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'max')) +RANGE(MIN discount_start_date MAX discount_end_date); + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res; +┌─res─┐ +│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null +└─────┘ + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res; +┌─res─┐ +│ 0.2 │ -- two ranges are matching, range_min 2015-01-15 (0.2) is bigger than 2015-01-01 (0.1) +└─────┘ + +select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res; +┌─res─┐ +│ 0.4 │ -- two ranges are matching, range_min 2015-01-04 (0.4) is bigger than 2015-01-01 (0.3) +└─────┘ + +select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res; +┌─res─┐ +│ 0.5 │ -- two ranges are matching, range_min are equal, 2015-01-15 (0.5) is bigger than 2015-01-10 (0.6) +└─────┘ + +DROP DICTIONARY discounts_dict; + +-- RANGE_LOOKUP_STRATEGY 'min' + +CREATE DICTIONARY discounts_dict +( + advertiser_id UInt64, + discount_start_date Date, + discount_end_date Nullable(Date), + amount Float64 +) +PRIMARY KEY advertiser_id +SOURCE(CLICKHOUSE(TABLE discounts)) +LIFETIME(MIN 600 MAX 900) +LAYOUT(RANGE_HASHED(RANGE_LOOKUP_STRATEGY 'min')) +RANGE(MIN discount_start_date MAX discount_end_date); + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-14')) res; +┌─res─┐ +│ 0.1 │ -- the only one range is matching: 2015-01-01 - Null +└─────┘ + +select dictGet('discounts_dict', 'amount', 1, toDate('2015-01-16')) res; +┌─res─┐ +│ 0.1 │ -- two ranges are matching, range_min 2015-01-01 (0.1) is less than 2015-01-15 (0.2) +└─────┘ + +select dictGet('discounts_dict', 'amount', 2, toDate('2015-01-06')) res; +┌─res─┐ +│ 0.3 │ -- two ranges are matching, range_min 2015-01-01 (0.3) is less than 2015-01-04 (0.4) +└─────┘ + +select dictGet('discounts_dict', 'amount', 3, toDate('2015-01-01')) res; +┌─res─┐ +│ 0.6 │ -- two ranges are matching, range_min are equal, 2015-01-10 (0.6) is less than 2015-01-15 (0.5) +└─────┘ +``` + ### complex_key_range_hashed The dictionary is stored in memory in the form of a hash table with an ordered array of ranges and their corresponding values (see [range_hashed](#range-hashed)). This type of storage is for use with composite [keys](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md). @@ -483,7 +588,7 @@ Set a large enough cache size. You need to experiment to select the number of ce 3. Assess memory consumption using the `system.dictionaries` table. 4. Increase or decrease the number of cells until the required memory consumption is reached. -:::warning +:::warning Do not use ClickHouse as a source, because it is slow to process queries with random reads. ::: @@ -555,25 +660,30 @@ This type of storage is for use with composite [keys](../../../sql-reference/dic This type of storage is for mapping network prefixes (IP addresses) to metadata such as ASN. -Example: The table contains network prefixes and their corresponding AS number and country code: +**Example** -``` text - +-----------|-----|------+ - | prefix | asn | cca2 | - +=================+=======+========+ - | 202.79.32.0/20 | 17501 | NP | - +-----------|-----|------+ - | 2620:0:870::/48 | 3856 | US | - +-----------|-----|------+ - | 2a02:6b8:1::/48 | 13238 | RU | - +-----------|-----|------+ - | 2001:db8::/32 | 65536 | ZZ | - +-----------|-----|------+ +Suppose we have a table in ClickHouse that contains our IP prefixes and mappings: + +```sql +CREATE TABLE my_ip_addresses ( + prefix String, + asn UInt32, + cca2 String +) +ENGINE = MergeTree +PRIMARY KEY prefix; ``` -When using this type of layout, the structure must have a composite key. +```sql +INSERT INTO my_ip_addresses VALUES + ('202.79.32.0/20', 17501, 'NP'), + ('2620:0:870::/48', 3856, 'US'), + ('2a02:6b8:1::/48', 13238, 'RU'), + ('2001:db8::/32', 65536, 'ZZ') +; +``` -Example: +Let's define an `ip_trie` dictionary for this table. The `ip_trie` layout requires a composite key: ``` xml @@ -607,26 +717,29 @@ Example: or ``` sql -CREATE DICTIONARY somedict ( +CREATE DICTIONARY my_ip_trie_dictionary ( prefix String, asn UInt32, cca2 String DEFAULT '??' ) PRIMARY KEY prefix +SOURCE(CLICKHOUSE(TABLE 'my_ip_addresses')) +LAYOUT(IP_TRIE) +LIFETIME(3600); ``` -The key must have only one String type attribute that contains an allowed IP prefix. Other types are not supported yet. +The key must have only one `String` type attribute that contains an allowed IP prefix. Other types are not supported yet. -For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys: +For queries, you must use the same functions (`dictGetT` with a tuple) as for dictionaries with composite keys. The syntax is: ``` sql dictGetT('dict_name', 'attr_name', tuple(ip)) ``` -The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6: +The function takes either `UInt32` for IPv4, or `FixedString(16)` for IPv6. For example: ``` sql -dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) +select dictGet('my_ip_trie_dictionary', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) ``` Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 225f2b162ab..1dc1b60c149 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -209,10 +209,25 @@ Aliases: `DAYOFMONTH`, `DAY`. ## toDayOfWeek -Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). +Converts a date or date with time to a UInt8 number containing the number of the day of the week. + +The two-argument form of `toDayOfWeek()` enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or 1 to 7. If the mode argument is ommited, the default mode is 0. The time zone of the date can be specified as the third argument. + +| Mode | First day of week | Range | +|------|-------------------|------------------------------------------------| +| 0 | Monday | 1-7: Monday = 1, Tuesday = 2, ..., Sunday = 7 | +| 1 | Monday | 0-6: Monday = 0, Tuesday = 1, ..., Sunday = 6 | +| 2 | Sunday | 0-6: Sunday = 0, Monday = 1, ..., Saturday = 6 | +| 3 | Sunday | 1-7: Sunday = 1, Monday = 2, ..., Saturday = 7 | Alias: `DAYOFWEEK`. +**Syntax** + +``` sql +toDayOfWeek(t[, mode[, timezone]]) +``` + ## toHour Converts a date with time to a UInt8 number containing the number of the hour in 24-hour time (0-23). @@ -316,11 +331,17 @@ If `toLastDayOfMonth` is called with an argument of type `Date` greater then 214 Rounds down a date, or date with time, to the nearest Monday. Returns the date. -## toStartOfWeek(t\[,mode\]) +## toStartOfWeek -Rounds down a date, or date with time, to the nearest Sunday or Monday by mode. +Rounds a date or date with time down to the nearest Sunday or Monday. Returns the date. -The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used. +The mode argument works exactly like the mode argument in function `toWeek()`. If no mode is specified, mode is assumed as 0. + +**Syntax** + +``` sql +toStartOfWeek(t[, mode[, timezone]]) +``` ## toStartOfDay @@ -455,10 +476,12 @@ Converts a date, or date with time, to a UInt16 number containing the ISO Year n Converts a date, or date with time, to a UInt8 number containing the ISO Week number. -## toWeek(date\[,mode\]) +## toWeek + +This function returns the week number for date or datetime. The two-argument form of `toWeek()` enables you to specify whether the week starts on Sunday or Monday and whether the return value should be in the range from 0 to 53 or from 1 to 53. If the mode argument is omitted, the default mode is 0. + +`toISOWeek()` is a compatibility function that is equivalent to `toWeek(date,3)`. -This function returns the week number for date or datetime. The two-argument form of toWeek() enables you to specify whether the week starts on Sunday or Monday and whether the return value should be in the range from 0 to 53 or from 1 to 53. If the mode argument is omitted, the default mode is 0. -`toISOWeek()`is a compatibility function that is equivalent to `toWeek(date,3)`. The following table describes how the mode argument works. | Mode | First day of week | Range | Week 1 is the first week … | @@ -482,13 +505,15 @@ For mode values with a meaning of “with 4 or more days this year,” weeks are For mode values with a meaning of “contains January 1”, the week contains January 1 is week 1. It does not matter how many days in the new year the week contained, even if it contained only one day. +**Syntax** + ``` sql -toWeek(date, [, mode][, Timezone]) +toWeek(t[, mode[, time_zone]]) ``` **Arguments** -- `date` – Date or DateTime. +- `t` – Date or DateTime. - `mode` – Optional parameter, Range of values is \[0,9\], default is 0. - `Timezone` – Optional parameter, it behaves like any other conversion function. @@ -504,13 +529,19 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we └────────────┴───────┴───────┴───────┘ ``` -## toYearWeek(date\[,mode\]) +## toYearWeek Returns year and week for a date. The year in the result may be different from the year in the date argument for the first and the last week of the year. -The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used. +The mode argument works exactly like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used. -`toISOYear()`is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`. +`toISOYear()` is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`. + +**Syntax** + +``` sql +toYearWeek(t[, mode[, timezone]]) +``` **Example** @@ -529,6 +560,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. +For an alternative to `age`, see function `date\_diff`. **Syntax** @@ -600,8 +632,12 @@ Result: ## date\_diff -Returns the count of the specified `unit` boundaries crossed between the `startdate` and `enddate`. -The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). +Returns the count of the specified `unit` boundaries crossed between the `startdate` and the `enddate`. +The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for unit `day` (see [toRelativeDayNum](#torelativedaynum)), 1 month for unit `month` (see [toRelativeMonthNum](#torelativemonthnum)) and 1 year for unit `year` (see [toRelativeYearNum](#torelativeyearnum)). + +If unit `week` was specified, `date\_diff` assumes that weeks start on Monday. Note that this behavior is different from that of function `toWeek()` in which weeks start by default on Sunday. + +For an alternative to `date\_diff`, see function `age`. **Syntax** @@ -1257,31 +1293,31 @@ Similar to formatDateTime, except that it formats datetime in Joda style instead Using replacement fields, you can define a pattern for the resulting string. -| Placeholder | Description | Presentation | Examples | -| ----------- | ----------- | ------------- | -------- | -| G | era | text | AD | -| C | century of era (>=0) | number | 20 | -| Y | year of era (>=0) | year | 1996 | -| x | weekyear(not supported yet) | year | 1996 | -| w | week of weekyear(not supported yet) | number | 27 | -| e | day of week | number | 2 | -| E | day of week | text | Tuesday; Tue | -| y | year | year | 1996 | -| D | day of year | number | 189 | -| M | month of year | month | July; Jul; 07 | -| d | day of month | number | 10 | -| a | halfday of day | text | PM | -| K | hour of halfday (0~11) | number | 0 | -| h | clockhour of halfday (1~12) | number | 12 | -| H | hour of day (0~23) | number | 0 | -| k | clockhour of day (1~24) | number | 24 | -| m | minute of hour | number | 30 | -| s | second of minute | number | 55 | -| S | fraction of second(not supported yet) | number | 978 | -| z | time zone(short name not supported yet) | text | Pacific Standard Time; PST | -| Z | time zone offset/id(not supported yet) | zone | -0800; -08:00; America/Los_Angeles | -| ' | escape for text | delimiter| | -| '' | single quote | literal | ' | +| Placeholder | Description | Presentation | Examples | +| ----------- | ---------------------------------------- | ------------- | ---------------------------------- | +| G | era | text | AD | +| C | century of era (>=0) | number | 20 | +| Y | year of era (>=0) | year | 1996 | +| x | weekyear (not supported yet) | year | 1996 | +| w | week of weekyear (not supported yet) | number | 27 | +| e | day of week | number | 2 | +| E | day of week | text | Tuesday; Tue | +| y | year | year | 1996 | +| D | day of year | number | 189 | +| M | month of year | month | July; Jul; 07 | +| d | day of month | number | 10 | +| a | halfday of day | text | PM | +| K | hour of halfday (0~11) | number | 0 | +| h | clockhour of halfday (1~12) | number | 12 | +| H | hour of day (0~23) | number | 0 | +| k | clockhour of day (1~24) | number | 24 | +| m | minute of hour | number | 30 | +| s | second of minute | number | 55 | +| S | fraction of second (not supported yet) | number | 978 | +| z | time zone (short name not supported yet) | text | Pacific Standard Time; PST | +| Z | time zone offset/id (not supported yet) | zone | -0800; -08:00; America/Los_Angeles | +| ' | escape for text | delimiter | | +| '' | single quote | literal | ' | **Example** @@ -1582,3 +1618,8 @@ Result: │ 2020-01-01 │ └────────────────────────────────────┘ ``` + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) + diff --git a/docs/en/sql-reference/functions/geo/s2.md b/docs/en/sql-reference/functions/geo/s2.md index ed3c66a0f6f..3cd66cfaaeb 100644 --- a/docs/en/sql-reference/functions/geo/s2.md +++ b/docs/en/sql-reference/functions/geo/s2.md @@ -304,7 +304,7 @@ Result: └──────────────┘ ``` -## s2RectUinion +## s2RectUnion Returns the smallest rectangle containing the union of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space. diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 730b494fcb5..ae6cdb7052d 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -45,37 +45,38 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16). -## MD5 +## MD5 {#hash_functions-md5} Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16). If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 +## sipHash64 (#hash_functions-siphash64) -Produces a 64-bit [SipHash](https://131002.net/siphash/) hash value. +Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. ```sql sipHash64(par1,...) ``` -This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) function. +This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) hash function. -Function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. Then combines hashes by the following algorithm: +The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: -1. After hashing all the input parameters, the function gets the array of hashes. -2. Function takes the first and the second elements and calculates a hash for the array of them. -3. Then the function takes the hash value, calculated at the previous step, and the third element of the initial hash array, and calculates a hash for the array of them. -4. The previous step is repeated for all the remaining elements of the initial hash array. +1. The first and the second hash value are concatenated to an array which is hashed. +2. The previously calculated hash value and the hash of the third input paramter are hashed in a similar way. +3. This calculation is repeated for all remaining hash values of the original input. **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +The function takes a variable number of input parameters of any of the [supported data types](/docs/en/sql-reference/data-types/index.md). **Returned Value** A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. +Note that the calculated hash values may be equal for the same input values of different argument types. This affects for example integer types of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data. + **Example** ```sql @@ -84,13 +85,45 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 ```response ┌──────────────SipHash─┬─type───┐ -│ 13726873534472839665 │ UInt64 │ +│ 11400366955626497465 │ UInt64 │ └──────────────────────┴────────┘ ``` +## sipHash64Keyed + +Same as [sipHash64](#hash_functions-siphash64) but additionally takes an explicit key argument instead of using a fixed key. + +**Syntax** + +```sql +sipHash64Keyed((k0, k1), par1,...) +``` + +**Arguments** + +Same as [sipHash64](#hash_functions-siphash64), but the first argument is a tuple of two UInt64 values representing the key. + +**Returned value** + +A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. + +**Example** + +Query: + +```sql +SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00')) AS SipHash, toTypeName(SipHash) AS type; +``` + +```response +┌─────────────SipHash─┬─type───┐ +│ 8017656310194184311 │ UInt64 │ +└─────────────────────┴────────┘ +``` + ## sipHash128 -Produces a 128-bit [SipHash](https://131002.net/siphash/) hash value. Differs from [sipHash64](#hash_functions-siphash64) in that the final xor-folding state is done up to 128 bits. +Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits. **Syntax** @@ -100,13 +133,11 @@ sipHash128(par1,...) **Arguments** -The function takes a variable number of input parameters. Arguments can be any of the [supported data types](/docs/en/sql-reference/data-types/index.md). For some data types calculated value of hash function may be the same for the same values even if types of arguments differ (integers of different size, named and unnamed `Tuple` with the same data, `Map` and the corresponding `Array(Tuple(key, value))` type with the same data). +Same as for [sipHash64](#hash_functions-siphash64). **Returned value** -A 128-bit `SipHash` hash value. - -Type: [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). +A 128-bit `SipHash` hash value of type [FixedString(16)](/docs/en/sql-reference/data-types/fixedstring.md). **Example** @@ -124,6 +155,40 @@ Result: └──────────────────────────────────┘ ``` +## sipHash128Keyed + +Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key. + +**Syntax** + +```sql +sipHash128Keyed((k0, k1), par1,...) +``` + +**Arguments** + +Same as [sipHash128](#hash_functions-siphash128), but the first argument is a tuple of two UInt64 values representing the key. + +**Returned value** + +A [UInt64](/docs/en/sql-reference/data-types/int-uint.md) data type hash value. + +**Example** + +Query: + +```sql +SELECT hex(sipHash128Keyed((506097522914230528, 1084818905618843912),'foo', '\x01', 3)); +``` + +Result: + +```response +┌─hex(sipHash128Keyed((506097522914230528, 1084818905618843912), 'foo', '', 3))─┐ +│ B8467F65C8B4CFD9A5F8BD733917D9BF │ +└───────────────────────────────────────────────────────────────────────────────┘ +``` + ## cityHash64 Produces a 64-bit [CityHash](https://github.com/google/cityhash) hash value. diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index c30893032b3..eb6866d28ea 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -588,3 +588,6 @@ Result: │ aeca2A │ └───────────────────────────────────────┘ ``` + +## Related content +- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index ca192b685bd..7b14b0e96de 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -95,6 +95,32 @@ Result: └───────────────────────────────┘ ``` +If argument `needle` is empty the following rules apply: +- if no `start_pos` was specified: return `1` +- if `start_pos = 0`: return `1` +- if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos` +- otherwise: return `0` + +The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8` + +``` sql +SELECT + position('abc', ''), + position('abc', '', 0), + position('abc', '', 1), + position('abc', '', 2), + position('abc', '', 3), + position('abc', '', 4), + position('abc', '', 5) +``` + +``` text +┌─position('abc', '')─┬─position('abc', '', 0)─┬─position('abc', '', 1)─┬─position('abc', '', 2)─┬─position('abc', '', 3)─┬─position('abc', '', 4)─┬─position('abc', '', 5)─┐ +│ 1 │ 1 │ 1 │ 2 │ 3 │ 4 │ 0 │ +└─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘ +``` + + **Examples for POSITION(needle IN haystack) syntax** Query: diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index f03a206da07..c0eed01cccd 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -115,3 +115,7 @@ Returns the exclusive upper bound of the corresponding hopping window. hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); ``` + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) \ No newline at end of file diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index bb72ab7cfc3..e587e56b20e 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1402,6 +1402,8 @@ The output value is a timestamp in UTC, not in the timezone of `DateTime64`. ```sql toUnixTimestamp64Milli(value) +toUnixTimestamp64Micro(value) +toUnixTimestamp64Nano(value) ``` **Arguments** @@ -1455,7 +1457,9 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and **Syntax** ``` sql -fromUnixTimestamp64Milli(value [, ti]) +fromUnixTimestamp64Milli(value [, timezone]) +fromUnixTimestamp64Micro(value [, timezone]) +fromUnixTimestamp64Nano(value [, timezone]) ``` **Arguments** diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index ae8671ffa9d..d580efa4992 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -158,8 +158,6 @@ For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-e If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist. -The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description. - When changing the type, values are converted as if the [toType](/docs/en/sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query does not do anything complex, and is completed almost instantly. Example: @@ -170,6 +168,40 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time. +The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description, but column type is mandatory in this case. + +Example: + +```sql +CREATE TABLE users ( + c1 Int16, + c2 String +) ENGINE = MergeTree +ORDER BY c1; + +DESCRIBE users; +┌─name─┬─type───┬ +│ c1 │ Int16 │ +│ c2 │ String │ +└──────┴────────┴ + +ALTER TABLE users MODIFY COLUMN c2 String FIRST; + +DESCRIBE users; +┌─name─┬─type───┬ +│ c2 │ String │ +│ c1 │ Int16 │ +└──────┴────────┴ + +ALTER TABLE users ALTER COLUMN c2 TYPE String AFTER c1; + +DESCRIBE users; +┌─name─┬─type───┬ +│ c1 │ Int16 │ +│ c2 │ String │ +└──────┴────────┴ +``` + The `ALTER` query is atomic. For MergeTree tables it is also lock-free. The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously. diff --git a/docs/en/sql-reference/statements/alter/skipping-index.md b/docs/en/sql-reference/statements/alter/skipping-index.md index 037e4bc38c5..87124c86eac 100644 --- a/docs/en/sql-reference/statements/alter/skipping-index.md +++ b/docs/en/sql-reference/statements/alter/skipping-index.md @@ -12,7 +12,7 @@ The following operations are available: - `ALTER TABLE [db].table_name [ON CLUSTER cluster] ADD INDEX name expression TYPE type GRANULARITY value [FIRST|AFTER name]` - Adds index description to tables metadata. -- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. +- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). - `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data. diff --git a/docs/en/sql-reference/statements/alter/ttl.md b/docs/en/sql-reference/statements/alter/ttl.md index a312e8cad91..14865e7bce0 100644 --- a/docs/en/sql-reference/statements/alter/ttl.md +++ b/docs/en/sql-reference/statements/alter/ttl.md @@ -6,6 +6,10 @@ sidebar_label: TTL # Manipulations with Table TTL +:::note +If you are looking for details on using TTL for managing old data, check out the [Manage Data with TTL](/docs/en/guides/developer/ttl.md) user guide. The docs below demonstrate how to alter or remove an existing TTL rule. +::: + ## MODIFY TTL You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form: diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md index a470b071971..e789dd9257f 100644 --- a/docs/en/sql-reference/statements/create/dictionary.md +++ b/docs/en/sql-reference/statements/create/dictionary.md @@ -110,7 +110,7 @@ LIFETIME(MIN 0 MAX 1000) ### Create a dictionary from a file available by HTTP(S) ```sql -statement: CREATE DICTIONARY default.taxi_zone_dictionary +CREATE DICTIONARY default.taxi_zone_dictionary ( `LocationID` UInt16 DEFAULT 0, `Borough` String, diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 68fb968c609..119f25d6d00 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -3,6 +3,7 @@ slug: /en/sql-reference/statements/create/table sidebar_position: 36 sidebar_label: TABLE title: "CREATE TABLE" +keywords: [compression, codec, schema, DDL] --- Creates a new table. This query can have various syntax forms depending on a use case. @@ -293,7 +294,7 @@ These codecs are designed to make compression more effective by using specific f #### Gorilla -`Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +`Gorilla` — Calculates XOR between current and previous floating point value and writes it in compact binary form. The smaller the difference between consecutive values is, i.e. the slower the values of the series changes, the better the compression rate. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078). #### FPC @@ -502,3 +503,9 @@ Result: │ t1 │ The temporary table │ └──────┴─────────────────────┘ ``` + + +## Related content + +- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 91f542be285..527b31b36a4 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -350,3 +350,7 @@ The window view is useful in the following scenarios: * **Monitoring**: Aggregate and calculate the metrics logs by time, and output the results to a target table. The dashboard can use the target table as a source table. * **Analyzing**: Automatically aggregate and preprocess data in the time window. This can be useful when analyzing a large number of logs. The preprocessing eliminates repeated calculations in multiple queries and reduces query latency. + +## Related Content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/delete.md b/docs/en/sql-reference/statements/delete.md index 0dc6cc0d09a..0acb6637ea6 100644 --- a/docs/en/sql-reference/statements/delete.md +++ b/docs/en/sql-reference/statements/delete.md @@ -7,7 +7,7 @@ sidebar_label: DELETE # DELETE Statement ``` sql -DELETE FROM [db.]table [WHERE expr] +DELETE FROM [db.]table [ON CLUSTER cluster] [WHERE expr] ``` `DELETE FROM` removes rows from table `[db.]table` that match expression `expr`. The deleted rows are marked as deleted immediately and will be automatically filtered out of all subsequent queries. Cleanup of data happens asynchronously in background. This feature is only available for MergeTree table engine family. diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 59af48b79ab..5081abf2fb8 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -276,14 +276,12 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; With `indexes` = 1, the `Indexes` key is added. It contains an array of used indexes. Each index is described as JSON with `Type` key (a string `MinMax`, `Partition`, `PrimaryKey` or `Skip`) and optional keys: -- `Name` — An index name (for now, is used only for `Skip` index). -- `Keys` — An array of columns used by the index. -- `Condition` — A string with condition used. -- `Description` — An index (for now, is used only for `Skip` index). -- `Initial Parts` — A number of parts before the index is applied. -- `Selected Parts` — A number of parts after the index is applied. -- `Initial Granules` — A number of granules before the index is applied. -- `Selected Granulesis` — A number of granules after the index is applied. +- `Name` — The index name (currently only used for `Skip` indexes). +- `Keys` — The array of columns used by the index. +- `Condition` — The used condition. +- `Description` — The index description (currently only used for `Skip` indexes). +- `Parts` — The number of parts before/after the index is applied. +- `Granules` — The number of granules before/after the index is applied. Example: @@ -294,46 +292,36 @@ Example: "Type": "MinMax", "Keys": ["y"], "Condition": "(y in [1, +inf))", - "Initial Parts": 5, - "Selected Parts": 4, - "Initial Granules": 12, - "Selected Granules": 11 + "Parts": 5/4, + "Granules": 12/11 }, { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], "Condition": "and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +inf)), (bitAnd(z, 3) not in [1, 1])))", - "Initial Parts": 4, - "Selected Parts": 3, - "Initial Granules": 11, - "Selected Granules": 10 + "Parts": 4/3, + "Granules": 11/10 }, { "Type": "PrimaryKey", "Keys": ["x", "y"], "Condition": "and((x in [11, +inf)), (y in [1, +inf)))", - "Initial Parts": 3, - "Selected Parts": 2, - "Initial Granules": 10, - "Selected Granules": 6 + "Parts": 3/2, + "Granules": 10/6 }, { "Type": "Skip", "Name": "t_minmax", "Description": "minmax GRANULARITY 2", - "Initial Parts": 2, - "Selected Parts": 1, - "Initial Granules": 6, - "Selected Granules": 2 + "Parts": 2/1, + "Granules": 6/2 }, { "Type": "Skip", "Name": "t_set", "Description": "set GRANULARITY 2", - "Initial Parts": 1, - "Selected Parts": 1, - "Initial Granules": 2, - "Selected Granules": 1 + "": 1/1, + "Granules": 2/1 } ] ``` diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 036d3f0599a..93657c75cbf 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -23,7 +23,7 @@ When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engin - If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting. - If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression). -- If you specify `FINAL`, optimization is performed even when all the data is already in one part. Also merge is forced even if concurrent merges are performed. +- If you specify `FINAL`, optimization is performed even when all the data is already in one part. You can control this behaviour with [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Also, the merge is forced even if concurrent merges are performed. - If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine. You can specify how long (in seconds) to wait for inactive replicas to execute `OPTIMIZE` queries by the [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout) setting. diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index 4bed43a3301..a1b5e0cdb36 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -299,3 +299,8 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; ## Implementation Details The query execution order is optimized when running `ARRAY JOIN`. Although `ARRAY JOIN` must always be specified before the [WHERE](../../../sql-reference/statements/select/where.md)/[PREWHERE](../../../sql-reference/statements/select/prewhere.md) clause in a query, technically they can be performed in any order, unless result of `ARRAY JOIN` is used for filtering. The processing order is controlled by the query optimizer. + + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/select/except.md b/docs/en/sql-reference/statements/select/except.md index 83bf0879213..f73cbff9819 100644 --- a/docs/en/sql-reference/statements/select/except.md +++ b/docs/en/sql-reference/statements/select/except.md @@ -23,7 +23,9 @@ FROM table2 ``` The condition could be any expression based on your requirements. -**Examples** +## Examples + +Here is a simple example that returns the numbers 1 to 10 that are _not_ a part of the numbers 3 to 8: Query: @@ -33,7 +35,7 @@ SELECT number FROM numbers(1,10) EXCEPT SELECT number FROM numbers(3,6); Result: -``` text +```response ┌─number─┐ │ 1 │ │ 2 │ @@ -42,28 +44,109 @@ Result: └────────┘ ``` -Query: +`EXCEPT` and `INTERSECT` can often be used interchangeably with different Boolean logic, and they are both useful if you have two tables that share a common column (or columns). For example, suppose we have a few million rows of historical cryptocurrency data that contains trade prices and volume: -``` sql -CREATE TABLE t1(one String, two String, three String) ENGINE=Memory(); -CREATE TABLE t2(four String, five String, six String) ENGINE=Memory(); +```sql +CREATE TABLE crypto_prices +( + trade_date Date, + crypto_name String, + volume Float32, + price Float32, + market_cap Float32, + change_1_day Float32 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name, trade_date); -INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o'); -INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd'); +INSERT INTO crypto_prices + SELECT * + FROM s3( + 'https://learn-clickhouse.s3.us-east-2.amazonaws.com/crypto_prices.csv', + 'CSVWithNames' +); -SELECT * FROM t1 EXCEPT SELECT * FROM t2; +SELECT * FROM crypto_prices +WHERE crypto_name = 'Bitcoin' +ORDER BY trade_date DESC +LIMIT 10; +``` + +```response +┌─trade_date─┬─crypto_name─┬──────volume─┬────price─┬───market_cap─┬──change_1_day─┐ +│ 2020-11-02 │ Bitcoin │ 30771456000 │ 13550.49 │ 251119860000 │ -0.013585099 │ +│ 2020-11-01 │ Bitcoin │ 24453857000 │ 13737.11 │ 254569760000 │ -0.0031840964 │ +│ 2020-10-31 │ Bitcoin │ 30306464000 │ 13780.99 │ 255372070000 │ 0.017308505 │ +│ 2020-10-30 │ Bitcoin │ 30581486000 │ 13546.52 │ 251018150000 │ 0.008084608 │ +│ 2020-10-29 │ Bitcoin │ 56499500000 │ 13437.88 │ 248995320000 │ 0.012552661 │ +│ 2020-10-28 │ Bitcoin │ 35867320000 │ 13271.29 │ 245899820000 │ -0.02804481 │ +│ 2020-10-27 │ Bitcoin │ 33749879000 │ 13654.22 │ 252985950000 │ 0.04427984 │ +│ 2020-10-26 │ Bitcoin │ 29461459000 │ 13075.25 │ 242251000000 │ 0.0033826586 │ +│ 2020-10-25 │ Bitcoin │ 24406921000 │ 13031.17 │ 241425220000 │ -0.0058658565 │ +│ 2020-10-24 │ Bitcoin │ 24542319000 │ 13108.06 │ 242839880000 │ 0.013650347 │ +└────────────┴─────────────┴─────────────┴──────────┴──────────────┴───────────────┘ +``` + +Now suppose we have a table named `holdings` that contains a list of cryptocurrencies that we own, along with the number of coins: + +```sql +CREATE TABLE holdings +( + crypto_name String, + quantity UInt64 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name); + +INSERT INTO holdings VALUES + ('Bitcoin', 1000), + ('Bitcoin', 200), + ('Ethereum', 250), + ('Ethereum', 5000), + ('DOGEFI', 10); + ('Bitcoin Diamond', 5000); +``` + +We can use `EXCEPT` to answer a question like **"Which coins do we own have never traded below $10?"**: + +```sql +SELECT crypto_name FROM holdings +EXCEPT +SELECT crypto_name FROM crypto_prices +WHERE price < 10; ``` Result: -``` text -┌─one─┬─two─┬─three─┐ -│ l │ p │ o │ -│ k │ t │ d │ -│ l │ p │ o │ -└─────┴─────┴───────┘ +```response +┌─crypto_name─┐ +│ Bitcoin │ +│ Bitcoin │ +└─────────────┘ ``` +This means of the four cryptocurrencies we own, only Bitcoin has never dropped below $10 (based on the limited data we have here in this example). + +## EXCEPT DISTINCT + +Notice in the previous query we had multiple Bitcoin holdings in the result. You can add `DISTINCT` to `EXCEPT` to eliminate duplicate rows from the result: + +```sql +SELECT crypto_name FROM holdings +EXCEPT DISTINCT +SELECT crypto_name FROM crypto_prices +WHERE price < 10; +``` + +Result: + +```response +┌─crypto_name─┐ +│ Bitcoin │ +└─────────────┘ +``` + + **See Also** - [UNION](union.md#union-clause) diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md index f1eb4738543..ea7a39421a5 100644 --- a/docs/en/sql-reference/statements/select/intersect.md +++ b/docs/en/sql-reference/statements/select/intersect.md @@ -24,17 +24,17 @@ FROM table2 ``` The condition could be any expression based on your requirements. -**Examples** +## Examples -Query: +Here is a simple example that intersects the numbers 1 to 10 with the numbers 3 to 8: -``` sql +```sql SELECT number FROM numbers(1,10) INTERSECT SELECT number FROM numbers(3,6); ``` Result: -``` text +```response ┌─number─┐ │ 3 │ │ 4 │ @@ -45,29 +45,112 @@ Result: └────────┘ ``` -Query: +`INTERSECT` is useful if you have two tables that share a common column (or columns). You can intersect the results of two queries, as long as the results contain the same columns. For example, suppose we have a few million rows of historical cryptocurrency data that contains trade prices and volume: -``` sql -CREATE TABLE t1(one String, two String, three String) ENGINE=Memory(); -CREATE TABLE t2(four String, five String, six String) ENGINE=Memory(); +```sql +CREATE TABLE crypto_prices +( + trade_date Date, + crypto_name String, + volume Float32, + price Float32, + market_cap Float32, + change_1_day Float32 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name, trade_date); -INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o'); -INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd'); +INSERT INTO crypto_prices + SELECT * + FROM s3( + 'https://learn-clickhouse.s3.us-east-2.amazonaws.com/crypto_prices.csv', + 'CSVWithNames' +); -SELECT * FROM t1 INTERSECT SELECT * FROM t2; +SELECT * FROM crypto_prices +WHERE crypto_name = 'Bitcoin' +ORDER BY trade_date DESC +LIMIT 10; +``` + +```response +┌─trade_date─┬─crypto_name─┬──────volume─┬────price─┬───market_cap─┬──change_1_day─┐ +│ 2020-11-02 │ Bitcoin │ 30771456000 │ 13550.49 │ 251119860000 │ -0.013585099 │ +│ 2020-11-01 │ Bitcoin │ 24453857000 │ 13737.11 │ 254569760000 │ -0.0031840964 │ +│ 2020-10-31 │ Bitcoin │ 30306464000 │ 13780.99 │ 255372070000 │ 0.017308505 │ +│ 2020-10-30 │ Bitcoin │ 30581486000 │ 13546.52 │ 251018150000 │ 0.008084608 │ +│ 2020-10-29 │ Bitcoin │ 56499500000 │ 13437.88 │ 248995320000 │ 0.012552661 │ +│ 2020-10-28 │ Bitcoin │ 35867320000 │ 13271.29 │ 245899820000 │ -0.02804481 │ +│ 2020-10-27 │ Bitcoin │ 33749879000 │ 13654.22 │ 252985950000 │ 0.04427984 │ +│ 2020-10-26 │ Bitcoin │ 29461459000 │ 13075.25 │ 242251000000 │ 0.0033826586 │ +│ 2020-10-25 │ Bitcoin │ 24406921000 │ 13031.17 │ 241425220000 │ -0.0058658565 │ +│ 2020-10-24 │ Bitcoin │ 24542319000 │ 13108.06 │ 242839880000 │ 0.013650347 │ +└────────────┴─────────────┴─────────────┴──────────┴──────────────┴───────────────┘ +``` + +Now suppose we have a table named `holdings` that contains a list of cryptocurrencies that we own, along with the number of coins: + +```sql +CREATE TABLE holdings +( + crypto_name String, + quantity UInt64 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name); + +INSERT INTO holdings VALUES + ('Bitcoin', 1000), + ('Bitcoin', 200), + ('Ethereum', 250), + ('Ethereum', 5000), + ('DOGEFI', 10); + ('Bitcoin Diamond', 5000); +``` + +We can use `INTERSECT` to answer questions like **"Which coins do we own have traded at a price greater than $100?"**: + +```sql +SELECT crypto_name FROM holdings +INTERSECT +SELECT crypto_name FROM crypto_prices +WHERE price > 100 ``` Result: -``` text -┌─one─┬─two─┬─three─┐ -│ q │ m │ b │ -│ s │ d │ f │ -│ s │ d │ f │ -│ s │ d │ f │ -└─────┴─────┴───────┘ +```response +┌─crypto_name─┐ +│ Bitcoin │ +│ Bitcoin │ +│ Ethereum │ +│ Ethereum │ +└─────────────┘ ``` +This means at some point in time, Bitcoin and Ethereum traded above $100, and DOGEFI and Bitcoin Diamond have never traded above $100 (at least using the data we have here in this example). + +## INTERSECT DISTINCT + +Notice in the previous query we had multiple Bitcoin and Ethereum holdings that traded above $100. It might be nice to remove duplicate rows (since they only repeat what we already know). You can add `DISTINCT` to `INTERSECT` to eliminate duplicate rows from the result: + +```sql +SELECT crypto_name FROM holdings +INTERSECT DISTINCT +SELECT crypto_name FROM crypto_prices +WHERE price > 100; +``` + +Result: + +```response +┌─crypto_name─┐ +│ Bitcoin │ +│ Ethereum │ +└─────────────┘ +``` + + **See Also** - [UNION](union.md#union-clause) diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 41370a38b16..e231a1cc72c 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -543,3 +543,7 @@ Result: │ 7 │ original │ 7 │ └─────┴──────────┴───────┘ ``` + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index 16fcb7b0c07..18b019dd017 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -510,3 +510,15 @@ Result: **See Also** - [system.settings](../../operations/system-tables/settings.md) table + +## SHOW ENGINES + +``` sql +SHOW ENGINES [INTO OUTFILE filename] [FORMAT format] +``` + +Outputs the content of the [system.table_engines](../../operations/system-tables/table_engines.md) table, that contains description of table engines supported by server and their feature support information. + +**See Also** + +- [system.table_engines](../../operations/system-tables/table_engines.md) table \ No newline at end of file diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index a82d1447453..300205a7ef4 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -72,7 +72,7 @@ For more convenient (automatic) cache management, see disable_internal_dns_cache ## DROP MARK CACHE -Resets the mark cache. Used in development of ClickHouse and performance tests. +Resets the mark cache. ## DROP REPLICA @@ -94,13 +94,18 @@ The fourth one is useful to remove metadata of dead replica when all other repli ## DROP UNCOMPRESSED CACHE -Reset the uncompressed data cache. Used in development of ClickHouse and performance tests. -For manage uncompressed data cache parameters use following server level settings [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) and query/user/profile level settings [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) +Reset the uncompressed data cache. +The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache). +Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size). ## DROP COMPILED EXPRESSION CACHE -Reset the compiled expression cache. Used in development of ClickHouse and performance tests. -Compiled expression cache used when query/user/profile enable option [compile-expressions](../../operations/settings/settings.md#compile-expressions) +Reset the compiled expression cache. +The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions). + +## DROP QUERY CACHE + +Resets the [query cache](../../operations/query-cache.md). ## FLUSH LOGS @@ -357,3 +362,15 @@ Allows to drop filesystem cache. ```sql SYSTEM DROP FILESYSTEM CACHE ``` + +### SYNC FILE CACHE + +:::note +It's too heavy and has potential for misuse. +::: + +Will do sync syscall. + +```sql +SYSTEM SYNC FILE CACHE +``` diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 045c9777ad7..47a3ef16ba2 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -14,7 +14,7 @@ The `INSERT` query uses both parsers: INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). +The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. It allows for avoiding issues with large `INSERT` queries. diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index 380c8364090..b53ccdd42b5 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -8,7 +8,7 @@ sidebar_label: generateRandom Generates random data with given schema. Allows to populate test tables with data. -Supports all data types that can be stored in table except `LowCardinality` and `AggregateFunction`. +Not all types are supported. ``` sql generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_string_length'[, 'max_array_length']]]) @@ -18,7 +18,7 @@ generateRandom('name TypeName[, name TypeName]...', [, 'random_seed'[, 'max_stri - `name` — Name of corresponding column. - `TypeName` — Type of corresponding column. -- `max_array_length` — Maximum array length for all generated arrays. Defaults to `10`. +- `max_array_length` — Maximum elements for all generated arrays or maps. Defaults to `10`. - `max_string_length` — Maximum string length for all generated strings. Defaults to `10`. - `random_seed` — Specify random seed manually to produce stable results. If NULL — seed is randomly generated. @@ -51,4 +51,7 @@ SELECT * FROM random; │ [] │ 68091.8197 │ ('2037-10-02 12:44:23.368','039ecab7-81c2-45ee-208c-844e5c6c5652') │ │ [8,-83,0,-22,65,9,-30,28,64] │ -186233.4909 │ ('2062-01-11 00:06:04.124','69563ea1-5ad1-f870-16d8-67061da0df25') │ └──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ -``` \ No newline at end of file +``` + +## Related content +- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md new file mode 100644 index 00000000000..dd063ae1796 --- /dev/null +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -0,0 +1,74 @@ +--- +slug: /en/sql-reference/table-functions/mongodb +sidebar_position: 42 +sidebar_label: mongodb +--- + +# mongodb + +Allows `SELECT` queries to be performed on data that is stored on a remote MongoDB server. + +**Syntax** + +``` sql +mongodb(host:port, database, collection, user, password, structure [, options]) +``` + +**Arguments** + +- `host:port` — MongoDB server address. + +- `database` — Remote database name. + +- `collection` — Remote collection name. + +- `user` — MongoDB user. + +- `password` — User password. + +- `structure` - The schema for the ClickHouse table returned from this function. + +- `options` - MongoDB connection string options (optional parameter). + + +**Returned Value** + +A table object with the same columns as the original MongoDB table. + + +**Examples** + +Suppose we have a collection named `my_collection` defined in a MongoDB database named `test`, and we insert a couple of documents: + +```sql +db.createUser({user:"test_user",pwd:"password",roles:[{role:"readWrite",db:"test"}]}) + +db.createCollection("my_collection") + +db.my_collection.insertOne( + { log_type: "event", host: "120.5.33.9", command: "check-cpu-usage -w 75 -c 90" } +) + +db.my_collection.insertOne( + { log_type: "event", host: "120.5.33.4", command: "system-check"} +) +``` + +Let's query the collection using the `mongodb` table function: + +```sql +SELECT * FROM mongodb( + '127.0.0.1:27017', + 'test', + 'my_collection', + 'test_user', + 'password', + 'log_type String, host String, command String', + 'connectTimeoutMS=10000' +) +``` + +**See Also** + +- [The `MongoDB` table engine](../../engines/table-engines/integrations/mongodb.md) +- [Using MongoDB as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources/#mongodb) diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index 3c1352cd56c..87fc6ecb234 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -131,3 +131,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) - [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md) - [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) + +## Related content +- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index a545fb630c9..f8107e3310e 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -590,5 +590,6 @@ ORDER BY ## Related Content -- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) -- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +- Blog: [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) +- Blog: [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md index 330c132f726..ba6b47116ad 100644 --- a/docs/ru/operations/named-collections.md +++ b/docs/ru/operations/named-collections.md @@ -27,7 +27,7 @@ $ cat /etc/clickhouse-server/config.d/named_collections.xml ## Именованные соединения для доступа к S3 -Описание параметров смотри [Табличная Функция S3](../sql-reference/table-functions/s3.md). +Описание параметров смотрите [Табличная Функция S3](../sql-reference/table-functions/s3.md). Пример конфигурации: ```xml @@ -75,7 +75,7 @@ SELECT * FROM s3_engine_table LIMIT 3; ## Пример использования именованных соединений с базой данных MySQL -Описание параметров смотри [mysql](../sql-reference/table-functions/mysql.md). +Описание параметров смотрите [mysql](../sql-reference/table-functions/mysql.md). Пример конфигурации: ```xml @@ -147,7 +147,7 @@ SELECT dictGet('dict', 'B', 2); ## Пример использования именованных соединений с базой данных PostgreSQL -Описание параметров смотри [postgresql](../sql-reference/table-functions/postgresql.md). +Описание параметров смотрите [postgresql](../sql-reference/table-functions/postgresql.md). Пример конфигурации: ```xml @@ -227,3 +227,58 @@ SELECT dictGet('dict', 'b', 2); │ two │ └─────────────────────────┘ ``` + +## Пример использования именованных соединений с удалённой базой данных Сlickhouse + +Описание параметров смотрите [remote](../sql-reference/table-functions/remote.md). + +Пример конфигурации: +```xml + + + + remote_host + 9000 + system + foo + secret + + + +``` + +### Пример использования именованных соединений с табличной функцией remote/remoteSecure + +```sql +SELECT * FROM remote(remote1, table = one); +┌─dummy─┐ +│ 0 │ +└───────┘ + +SELECT * FROM remote(remote1, database = merge(system, '^one')); +┌─dummy─┐ +│ 0 │ +└───────┘ + +INSERT INTO FUNCTION remote(remote1, database = default, table = test) VALUES (1,'a'); + +SELECT * FROM remote(remote1, database = default, table = test); +┌─a─┬─b─┐ +│ 1 │ a │ +└───┴───┘ +``` + +### Пример использования именованных соединений с внешним словарем с источником удалённым сервером Clickhouse + +```sql +CREATE DICTIONARY dict(a Int64, b String) +PRIMARY KEY a +SOURCE(CLICKHOUSE(NAME remote1 TABLE test DB default)) +LIFETIME(MIN 1 MAX 2) +LAYOUT(HASHED()); + +SELECT dictGet('dict', 'b', 1); +┌─dictGet('dict', 'b', 1)─┐ +│ a │ +└─────────────────────────┘ +``` diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 58894611386..2bb0919bc8b 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1997,6 +1997,21 @@ SELECT * FROM test_table Значение по умолчанию: 0. +## optimize_skip_merged_partitions {#optimize-skip-merged-partitions} + +Включает или отключает оптимизацию для запроса [OPTIMIZE TABLE ... FINAL](../../sql-reference/statements/optimize.md), когда есть только один парт с level > 0 и неистекший TTL. + +- `OPTIMIZE TABLE ... FINAL SETTINGS optimize_skip_merged_partitions=1` + +По умолчанию, `OPTIMIZE TABLE ... FINAL` перезапишет даже один парт. + +Возможные значения: + +- 1 - Включена +- 0 - Выключена + +Значение по умолчанию: 0. + ## optimize_functions_to_subcolumns {#optimize-functions-to-subcolumns} Включает или отключает оптимизацию путем преобразования некоторых функций к чтению подстолбцов, таким образом уменьшая объем данных для чтения. diff --git a/docs/ru/sql-reference/statements/explain.md b/docs/ru/sql-reference/statements/explain.md index 4d5fa70e098..0179c840df6 100644 --- a/docs/ru/sql-reference/statements/explain.md +++ b/docs/ru/sql-reference/statements/explain.md @@ -248,10 +248,8 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; - `Keys` — массив столбцов, используемых индексом. - `Condition` — строка с используемым условием. - `Description` — индекс (на данный момент используется только для индекса `Skip`). -- `Initial Parts` — количество кусков до применения индекса. -- `Selected Parts` — количество кусков после применения индекса. -- `Initial Granules` — количество гранул до применения индекса. -- `Selected Granulesis` — количество гранул после применения индекса. +- `Parts` — количество кусков до/после применения индекса. +- `Granules` — количество гранул до/после применения индекса. Пример: @@ -262,46 +260,36 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; "Type": "MinMax", "Keys": ["y"], "Condition": "(y in [1, +inf))", - "Initial Parts": 5, - "Selected Parts": 4, - "Initial Granules": 12, - "Selected Granules": 11 + "Parts": 5/4, + "Granules": 12/11 }, { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], "Condition": "and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +inf)), (bitAnd(z, 3) not in [1, 1])))", - "Initial Parts": 4, - "Selected Parts": 3, - "Initial Granules": 11, - "Selected Granules": 10 + "Parts": 4/3, + "Granules": 11/10 }, { "Type": "PrimaryKey", "Keys": ["x", "y"], "Condition": "and((x in [11, +inf)), (y in [1, +inf)))", - "Initial Parts": 3, - "Selected Parts": 2, - "Initial Granules": 10, - "Selected Granules": 6 + "Parts": 3/2, + "Granules": 10/6 }, { "Type": "Skip", "Name": "t_minmax", "Description": "minmax GRANULARITY 2", - "Initial Parts": 2, - "Selected Parts": 1, - "Initial Granules": 6, - "Selected Granules": 2 + "Parts": 2/1, + "Granules": 6/2 }, { "Type": "Skip", "Name": "t_set", "Description": "set GRANULARITY 2", - "Initial Parts": 1, - "Selected Parts": 1, - "Initial Granules": 2, - "Selected Granules": 1 + "": 1/1, + "Granules": 2/1 } ] ``` diff --git a/docs/ru/sql-reference/statements/optimize.md b/docs/ru/sql-reference/statements/optimize.md index 0ded60b8796..58762411506 100644 --- a/docs/ru/sql-reference/statements/optimize.md +++ b/docs/ru/sql-reference/statements/optimize.md @@ -24,7 +24,7 @@ OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION I - По умолчанию, если запросу `OPTIMIZE` не удалось выполнить слияние, то ClickHouse не оповещает клиента. Чтобы включить оповещения, используйте настройку [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop). - Если указать `PARTITION`, то оптимизация выполняется только для указанной партиции. [Как задавать имя партиции в запросах](alter/index.md#alter-how-to-specify-part-expr). -- Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске данных. Кроме того, слияние является принудительным, даже если выполняются параллельные слияния. +- Если указать `FINAL`, то оптимизация выполняется даже в том случае, если все данные уже лежат в одном куске данных. Можно контролировать с помощью настройки [optimize_skip_merged_partitions](../../operations/settings/settings.md#optimize-skip-merged-partitions). Кроме того, слияние является принудительным, даже если выполняются параллельные слияния. - Если указать `DEDUPLICATE`, то произойдет схлопывание полностью одинаковых строк (сравниваются значения во всех столбцах), имеет смысл только для движка MergeTree. Вы можете указать время ожидания (в секундах) выполнения запросов `OPTIMIZE` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout). @@ -196,4 +196,5 @@ SELECT * FROM example; ┌─primary_key─┬─secondary_key─┬─value─┬─partition_key─┐ │ 1 │ 1 │ 2 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ -``` \ No newline at end of file +``` + diff --git a/docs/tools/.gitignore b/docs/tools/.gitignore index 443cee8638c..8d35cb3277f 100644 --- a/docs/tools/.gitignore +++ b/docs/tools/.gitignore @@ -1,3 +1,2 @@ -build __pycache__ *.pyc diff --git a/docs/tools/README.md b/docs/tools/README.md index 5809f43386a..c7147a0c850 100644 --- a/docs/tools/README.md +++ b/docs/tools/README.md @@ -1 +1 @@ -See https://github.com/ClickHouse/ClickHouse/blob/master/docs/tools/README.md +See https://github.com/ClickHouse/clickhouse-docs/blob/main/contrib-writing-guide.md diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 26099b352a3..dae3aea2d2e 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -277,7 +277,7 @@ private: } if (queries.empty()) - throw Exception("Empty list of queries.", ErrorCodes::EMPTY_DATA_PASSED); + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Empty list of queries."); } else { diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index af1a019e1f8..d741eb30d4a 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -127,6 +127,69 @@ void Client::showWarnings() } } +void Client::parseConnectionsCredentials() +{ + /// It is not possible to correctly handle multiple --host --port options. + if (hosts_and_ports.size() >= 2) + return; + + String host; + std::optional port; + if (hosts_and_ports.empty()) + { + host = config().getString("host", "localhost"); + if (config().has("port")) + port = config().getInt("port"); + } + else + { + host = hosts_and_ports.front().host; + port = hosts_and_ports.front().port; + } + + Strings keys; + config().keys("connections_credentials", keys); + for (const auto & connection : keys) + { + const String & prefix = "connections_credentials." + connection; + + const String & connection_name = config().getString(prefix + ".name", ""); + if (connection_name != host) + continue; + + String connection_hostname; + if (config().has(prefix + ".hostname")) + connection_hostname = config().getString(prefix + ".hostname"); + else + connection_hostname = connection_name; + + /// Set "host" unconditionally (since it is used as a "name"), while + /// other options only if they are not set yet (config.xml/cli + /// options). + config().setString("host", connection_hostname); + if (!hosts_and_ports.empty()) + hosts_and_ports.front().host = connection_hostname; + + if (config().has(prefix + ".port") && !port.has_value()) + config().setInt("port", config().getInt(prefix + ".port")); + if (config().has(prefix + ".secure") && !config().has("secure")) + config().setBool("secure", config().getBool(prefix + ".secure")); + if (config().has(prefix + ".user") && !config().has("user")) + config().setString("user", config().getString(prefix + ".user")); + if (config().has(prefix + ".password") && !config().has("password")) + config().setString("password", config().getString(prefix + ".password")); + if (config().has(prefix + ".database") && !config().has("database")) + config().setString("database", config().getString(prefix + ".database")); + if (config().has(prefix + ".history_file") && !config().has("history_file")) + { + String history_file = config().getString(prefix + ".history_file"); + if (history_file.starts_with("~") && !home_path.empty()) + history_file = home_path + "/" + history_file.substr(1); + config().setString("history_file", history_file); + } + } +} + /// Make query to get all server warnings std::vector Client::loadWarningMessages() { @@ -216,6 +279,8 @@ void Client::initialize(Poco::Util::Application & self) if (env_password) config().setString("password", env_password); + parseConnectionsCredentials(); + // global_context->setApplicationType(Context::ApplicationType::CLIENT); global_context->setQueryParameters(query_parameters); @@ -719,7 +784,7 @@ bool Client::processWithFuzzing(const String & full_query) // uniformity. // Surprisingly, this is a client exception, because we get the // server exception w/o throwing (see onReceiveException()). - client_exception = std::make_unique(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode()); + client_exception = std::make_unique(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode()); have_error = true; } @@ -854,7 +919,7 @@ bool Client::processWithFuzzing(const String & full_query) } catch (...) { - client_exception = std::make_unique(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode()); + client_exception = std::make_unique(getCurrentExceptionMessageAndPattern(print_stack_trace), getCurrentExceptionCode()); have_error = true; } @@ -968,7 +1033,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (external_tables.back().file == "-") ++number_of_external_tables_with_stdin_source; if (number_of_external_tables_with_stdin_source > 1) - throw Exception("Two or more external tables has stdin (-) set as --file field", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Two or more external tables has stdin (-) set as --file field"); } catch (const Exception & e) { @@ -1021,7 +1086,7 @@ void Client::processOptions(const OptionsDescription & options_description, } if (options.count("config-file") && options.count("config")) - throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Two or more configuration files referenced in arguments"); if (options.count("config")) config().setString("config-file", options["config"].as()); @@ -1212,14 +1277,14 @@ void Client::readArguments( /// param_name value ++arg_num; if (arg_num >= argc) - throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter requires value"); arg = argv[arg_num]; query_parameters.emplace(String(param_continuation), String(arg)); } else { if (equal_pos == 0) - throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty"); /// param_name=value query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1)); @@ -1233,7 +1298,7 @@ void Client::readArguments( { ++arg_num; if (arg_num >= argc) - throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host argument requires value"); arg = argv[arg_num]; host_arg = "--host="; host_arg.append(arg); @@ -1265,7 +1330,7 @@ void Client::readArguments( port_arg.push_back('='); ++arg_num; if (arg_num >= argc) - throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Port argument requires value"); arg = argv[arg_num]; port_arg.append(arg); } diff --git a/programs/client/Client.h b/programs/client/Client.h index 63f28ca96a2..9f593ea4165 100644 --- a/programs/client/Client.h +++ b/programs/client/Client.h @@ -47,6 +47,7 @@ protected: private: void printChangedSettings() const; void showWarnings(); + void parseConnectionsCredentials(); std::vector loadWarningMessages(); }; } diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml index 2923de44045..dbfb267d778 100644 --- a/programs/client/clickhouse-client.xml +++ b/programs/client/clickhouse-client.xml @@ -57,4 +57,28 @@ The same can be done on user-level configuration, just create & adjust: ~/.clickhouse-client/config.xml --> + + + + + + + default + + 127.0.0.1 + 9000 + 1 + default + + + + + + + ]]> diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index fe8debcee27..b60138b5692 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -46,7 +46,7 @@ void checkAndWriteHeader(DB::ReadBuffer & in, DB::WriteBuffer & out) UInt32 size_compressed = unalignedLoad(&header[1]); if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) - throw DB::Exception("Too large size_compressed. Most likely corrupted data.", DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); + throw DB::Exception(DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED, "Too large size_compressed. Most likely corrupted data."); UInt32 size_decompressed = unalignedLoad(&header[5]); @@ -113,10 +113,10 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) codecs = options["codec"].as>(); if ((use_lz4hc || use_zstd || use_deflate_qpl || use_none) && !codecs.empty()) - throw Exception("Wrong options, codec flags like --zstd and --codec options are mutually exclusive", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong options, codec flags like --zstd and --codec options are mutually exclusive"); if (!codecs.empty() && options.count("level")) - throw Exception("Wrong options, --level is not compatible with --codec list", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong options, --level is not compatible with --codec list"); std::string method_family = "LZ4"; diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 7653b19f21c..256b40414c5 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -77,7 +76,7 @@ decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries) std::exception_ptr exception; if (max_tries == 0) - throw Exception("Cannot perform zero retries", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform zero retries"); for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) { @@ -123,7 +122,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts, } catch (Exception & e) { - throw Exception("Partition " + partition_text_quoted + " has incorrect format. " + e.displayText(), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition {} has incorrect format. {}", partition_text_quoted, e.displayText()); } }; @@ -325,8 +324,8 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts) if (!table_is_done) { - throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution", - ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Too many tries to process table {}. Abort remaining execution", + task_table.table_id); } } } @@ -666,7 +665,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t } if (inject_fault) - throw Exception("Copy fault injection is activated", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); } /// Create node to signal that we finished moving @@ -753,7 +752,7 @@ std::shared_ptr rewriteCreateQueryStorage(const ASTPtr & create_ auto res = std::make_shared(create); if (create.storage == nullptr || new_storage_ast == nullptr) - throw Exception("Storage is not specified", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage is not specified"); res->setDatabase(new_table.first); res->setTable(new_table.second); @@ -775,7 +774,7 @@ bool ClusterCopier::tryDropPartitionPiece( const CleanStateClock & clean_state_clock) { if (is_safe_mode) - throw Exception("DROP PARTITION is prohibited in safe mode", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP PARTITION is prohibited in safe mode"); TaskTable & task_table = task_partition.task_shard.task_table; ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; @@ -944,7 +943,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab for (const String & partition_name : task_table.ordered_partition_names) { if (!task_table.cluster_partitions.contains(partition_name)) - throw Exception("There are no expected partition " + partition_name + ". It is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no expected partition {}. It is a bug", partition_name); ClusterPartition & cluster_partition = task_table.cluster_partitions[partition_name]; @@ -1006,7 +1005,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab /// Previously when we discovered that shard does not contain current partition, we skipped it. /// At this moment partition have to be present. if (it_shard_partition == shard->partition_tasks.end()) - throw Exception("There are no such partition in a shard. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no such partition in a shard. This is a bug."); auto & partition = it_shard_partition->second; expected_shards.emplace_back(shard); @@ -1587,7 +1586,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( auto cancel_check = [&] () { if (zookeeper->expired()) - throw Exception("ZooKeeper session is expired, cancel INSERT SELECT", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "ZooKeeper session is expired, cancel INSERT SELECT"); if (!future_is_dirty_checker.valid()) future_is_dirty_checker = zookeeper->asyncExists(piece_is_dirty_flag_path); @@ -1603,7 +1602,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( LogicalClock dirt_discovery_epoch (status.stat.mzxid); if (dirt_discovery_epoch == clean_state_clock.discovery_zxid) return false; - throw Exception("Partition is dirty, cancel INSERT SELECT", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Partition is dirty, cancel INSERT SELECT"); } } @@ -1646,7 +1645,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( future_is_dirty_checker.get(); if (inject_fault) - throw Exception("Copy fault injection is activated", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); } catch (...) { diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 128194b401c..56c4dfa1dba 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -90,9 +90,7 @@ ASTPtr extractPartitionKey(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { - throw Exception( - "Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } if (isExtendedDefinitionStorage(storage_ast)) @@ -109,14 +107,13 @@ ASTPtr extractPartitionKey(const ASTPtr & storage_ast) size_t min_args = is_replicated ? 3 : 1; if (!engine.arguments) - throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str); ASTPtr arguments_ast = engine.arguments->clone(); ASTs & arguments = arguments_ast->children; if (arguments.size() < min_args) - throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str); ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1]; return makeASTFunction("toYYYYMM", month_arg->clone()); @@ -132,14 +129,12 @@ ASTPtr extractPrimaryKey(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { - throw Exception("Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } if (!isExtendedDefinitionStorage(storage_ast)) { - throw Exception("Is not extended deginition storage " + storage_str + " Will be fixed later.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); } if (storage.primary_key) @@ -158,20 +153,18 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { - throw Exception("Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } if (!isExtendedDefinitionStorage(storage_ast)) { - throw Exception("Is not extended deginition storage " + storage_str + " Will be fixed later.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); } if (storage.order_by) return storage.order_by->clone(); - throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); } /// Wraps only identifiers with backticks. @@ -191,7 +184,7 @@ std::string wrapIdentifiersWithBackticks(const ASTPtr & root) return boost::algorithm::join(function_arguments, ", "); } - throw Exception("Primary key could be represented only as columns or functions from columns.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns."); } @@ -210,9 +203,9 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) size_t sorting_key_size = sorting_key_expr_list->children.size(); if (primary_key_size > sorting_key_size) - throw Exception("Primary key must be a prefix of the sorting key, but its length: " - + toString(primary_key_size) + " is greater than the sorting key length: " + toString(sorting_key_size), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " + "{} is greater than the sorting key length: {}", + primary_key_size, sorting_key_size); Names primary_key_columns; NameSet primary_key_columns_set; @@ -228,12 +221,12 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) { String pk_column = primary_key_expr_list->children[i]->getColumnName(); if (pk_column != sorting_key_column) - throw Exception("Primary key must be a prefix of the sorting key, but the column in the position " - + toString(i) + " is " + sorting_key_column +", not " + pk_column, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Primary key must be a prefix of the sorting key, " + "but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column); if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception("Primary key contains duplicate columns", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i])); } @@ -250,9 +243,7 @@ bool isReplicatedTableEngine(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { String storage_str = queryToString(storage_ast); - throw Exception( - "Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } return startsWith(engine.name, "Replicated"); diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 64ab0019d05..b3c9936cd33 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -119,7 +119,7 @@ struct TaskStateWithOwner rb >> state >> "\n" >> escape >> res.owner; if (state >= static_cast(TaskState::Unknown)) - throw Exception("Unknown state " + data, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state {}", data); res.state = static_cast(state); return res; diff --git a/programs/copier/TaskCluster.cpp b/programs/copier/TaskCluster.cpp index 957c7d2120d..053ef39aa81 100644 --- a/programs/copier/TaskCluster.cpp +++ b/programs/copier/TaskCluster.cpp @@ -19,7 +19,7 @@ void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config clusters_prefix = prefix + "remote_servers"; if (!config.has(clusters_prefix)) - throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "You should specify list of clusters in {}", clusters_prefix); Poco::Util::AbstractConfiguration::Keys tables_keys; config.keys(prefix + "tables", tables_keys); diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp index 65eaf8b7108..451a33a1c02 100644 --- a/programs/copier/TaskTable.cpp +++ b/programs/copier/TaskTable.cpp @@ -102,7 +102,7 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati for (const String &key : keys) { if (!startsWith(key, "partition")) - throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown key {} in {}", key, enabled_partitions_prefix); enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); } @@ -213,8 +213,7 @@ ClusterPartition & TaskTable::getClusterPartition(const String & partition_name) { auto it = cluster_partitions.find(partition_name); if (it == cluster_partitions.end()) - throw Exception("There are no cluster partition " + partition_name + " in " + table_id, - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no cluster partition {} in {}", partition_name, table_id); return it->second; } diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 1e4a3ba6908..1cfce7fc022 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -44,7 +44,7 @@ public: if (command_arguments.size() != 2) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name_from = config.getString("diskFrom", config.getString("disk", "default")); diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 766d03a0b6b..0e94eb87c04 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -33,7 +33,7 @@ public: if (command_arguments.size() != 2) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index a1d41316b9d..470784bff00 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -40,7 +40,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index a6b38f60a67..7b2fcd16107 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -32,7 +32,7 @@ public: if (!command_arguments.empty()) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } for (const auto & [disk_name, _] : global_context->getDisksMap()) diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index b4b08391663..c938cc52132 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -41,7 +41,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 085a0e2d5eb..3c564f3bcd3 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -32,7 +32,7 @@ public: if (command_arguments.size() != 2) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 2a04dd7a902..2dd5c191d10 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -43,7 +43,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index c742cdec042..ff8d4a1c6bb 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -32,7 +32,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 152cb33c458..b055c6f9343 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -44,7 +44,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 0e0e34f7d10..b81cd52f8c8 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -119,7 +119,7 @@ void DisksApp::init(std::vector & common_arguments) { std::cerr << "Unknown command name: " << command_name << "\n"; printHelpMessage(options_description); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } processOptions(); diff --git a/programs/format/Format.cpp b/programs/format/Format.cpp index 5eed58b15d0..8bab24b5e37 100644 --- a/programs/format/Format.cpp +++ b/programs/format/Format.cpp @@ -165,9 +165,8 @@ int mainEntryClickHouseFormat(int argc, char ** argv) /// should throw exception early and make exception message more readable. if (const auto * insert_query = res->as(); insert_query && insert_query->data) { - throw Exception( - "Can't format ASTInsertQuery with data, since data will be lost", - DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA); + throw Exception(DB::ErrorCodes::INVALID_FORMAT_INSERT_QUERY_WITH_DATA, + "Can't format ASTInsertQuery with data, since data will be lost"); } if (!quiet) { diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index 9e464164da6..82eade8d27b 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -1160,7 +1160,7 @@ void processLog(const Options & options) /// Will run multiple processes in parallel size_t num_threads = options.threads; if (num_threads == 0) - throw Exception("num-threads cannot be zero", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "num-threads cannot be zero"); std::vector> show_commands(num_threads); for (size_t i = 0; i < num_commits && i < num_threads; ++i) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 25452b808e2..3a0d3d3a6ca 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -196,7 +196,7 @@ void Keeper::createServer(const std::string & listen_host, const char * port_nam } else { - throw Exception{message, ErrorCodes::NETWORK_ERROR}; + throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); } } } @@ -375,7 +375,7 @@ try if (effective_user_id == 0) { message += " Run under 'sudo -u " + data_owner + "'."; - throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); + throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); } else { @@ -484,8 +484,7 @@ try config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), true), server_pool, socket)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); diff --git a/programs/library-bridge/CatBoostLibraryHandler.cpp b/programs/library-bridge/CatBoostLibraryHandler.cpp index 4fe539a53b2..46cebf1186a 100644 --- a/programs/library-bridge/CatBoostLibraryHandler.cpp +++ b/programs/library-bridge/CatBoostLibraryHandler.cpp @@ -169,7 +169,7 @@ std::vector> placeStringColumns(const ColumnRawPtrs & columns, si else if (const auto * column_fixed_string = typeid_cast(column)) data.push_back(placeFixedStringColumn(*column_fixed_string, buffer + i, size)); else - throw Exception("Cannot place string column.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot place string column."); } return data; @@ -243,7 +243,6 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl( const ColumnRawPtrs & columns, bool cat_features_are_strings) const { - std::string error_msg = "Error occurred while applying CatBoost model: "; size_t column_size = columns.front()->size(); auto result = ColumnFloat64::create(column_size * tree_count); @@ -265,7 +264,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl( result_buf, column_size * tree_count)) { - throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL, + "Error occurred while applying CatBoost model: {}", api.GetErrorString()); } return result; } @@ -288,7 +288,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl( cat_features_buf, cat_features_count, result_buf, column_size * tree_count)) { - throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL, + "Error occurred while applying CatBoost model: {}", api.GetErrorString()); } } else @@ -304,7 +305,8 @@ ColumnFloat64::MutablePtr CatBoostLibraryHandler::evalImpl( cat_features_buf, cat_features_count, result_buf, column_size * tree_count)) { - throw Exception(error_msg + api.GetErrorString(), ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL); + throw Exception(ErrorCodes::CANNOT_APPLY_CATBOOST_MODEL, + "Error occurred while applying CatBoost model: {}", api.GetErrorString()); } } diff --git a/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp b/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp index c60d4a4e5cc..2cc2df03a1d 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp +++ b/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp @@ -32,7 +32,7 @@ ExternalDictionaryLibraryHandler::ExternalDictionaryLibraryHandler( if (lib_new) lib_data = lib_new(&settings_holder->strings, ExternalDictionaryLibraryAPI::log); else - throw Exception("Method extDict_libNew failed", ErrorCodes::EXTERNAL_LIBRARY_ERROR); + throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "Method extDict_libNew failed"); } @@ -173,22 +173,21 @@ Block ExternalDictionaryLibraryHandler::loadKeys(const Columns & key_columns) Block ExternalDictionaryLibraryHandler::dataToBlock(ExternalDictionaryLibraryAPI::RawClickHouseLibraryTable data) { if (!data) - throw Exception("LibraryDictionarySource: No data returned", ErrorCodes::EXTERNAL_LIBRARY_ERROR); + throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "LibraryDictionarySource: No data returned"); const auto * columns_received = static_cast(data); if (columns_received->error_code) - throw Exception( - "LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " " + (columns_received->error_string ? columns_received->error_string : ""), - ErrorCodes::EXTERNAL_LIBRARY_ERROR); + throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "LibraryDictionarySource: Returned error: {} {}", + std::to_string(columns_received->error_code), (columns_received->error_string ? columns_received->error_string : "")); MutableColumns columns = sample_block.cloneEmptyColumns(); for (size_t col_n = 0; col_n < columns_received->size; ++col_n) { if (columns.size() != columns_received->data[col_n].size) - throw Exception( - "LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size) + ", must be " + std::to_string(columns.size()), - ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "LibraryDictionarySource: " + "Returned unexpected number of columns: {}, must be {}", + columns_received->data[col_n].size, columns.size()); for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n) { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b36d3a85bd3..2f0f98ae857 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -359,7 +359,7 @@ void LocalServer::setupUsers() if (users_config) global_context->setUsersConfig(users_config); else - throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG); + throw Exception(ErrorCodes::CANNOT_LOAD_CONFIG, "Can't load config for users"); } void LocalServer::connect() @@ -489,7 +489,7 @@ void LocalServer::processConfig() if (is_interactive && !delayed_interactive) { if (config().has("query") && config().has("queries-file")) - throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specify either `query` or `queries-file` option"); if (config().has("multiquery")) is_multiquery = true; diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index b6952ad6cb0..274ad29a174 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -880,7 +880,7 @@ public: } if (!it) - throw Exception("Logical error in markov model", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in markov model"); size_t offset_from_begin_of_string = pos - data; size_t determinator_sliding_window_size = params.determinator_sliding_window_size; @@ -1139,7 +1139,7 @@ public: if (const auto * type = typeid_cast(&data_type)) return std::make_unique(get(*type->getNestedType(), seed, markov_model_params)); - throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported data type"); } }; @@ -1384,7 +1384,7 @@ try UInt8 version = 0; readBinary(version, model_in); if (version != 0) - throw Exception("Unknown version of the model file", ErrorCodes::UNKNOWN_FORMAT_VERSION); + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown version of the model file"); readBinary(source_rows, model_in); @@ -1392,14 +1392,14 @@ try size_t header_size = 0; readBinary(header_size, model_in); if (header_size != data_types.size()) - throw Exception("The saved model was created for different number of columns", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "The saved model was created for different number of columns"); for (size_t i = 0; i < header_size; ++i) { String type; readBinary(type, model_in); if (type != data_types[i]) - throw Exception("The saved model was created for different types of columns", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "The saved model was created for different types of columns"); } obfuscator.deserialize(model_in); diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index bf11947d436..6e93246e59a 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -181,7 +181,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ } if (columns.empty()) - throw Exception("Columns definition was not returned", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns definition was not returned"); WriteBufferFromHTTPServerResponse out( response, diff --git a/programs/odbc-bridge/IdentifierQuoteHandler.cpp b/programs/odbc-bridge/IdentifierQuoteHandler.cpp index 8157e3b6159..f622995bf15 100644 --- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp +++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp @@ -61,13 +61,18 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ return; } + bool use_connection_pooling = params.getParsed("use_connection_pooling", true); + try { std::string connection_string = params.get("connection_string"); - auto connection = ODBCPooledConnectionFactory::instance().get( - validateODBCConnectionString(connection_string), - getContext()->getSettingsRef().odbc_bridge_connection_pool_size); + nanodbc::ConnectionHolderPtr connection; + if (use_connection_pooling) + connection = ODBCPooledConnectionFactory::instance().get( + validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); + else + connection = std::make_shared(validateODBCConnectionString(connection_string)); auto identifier = getIdentifierQuote(std::move(connection)); diff --git a/programs/odbc-bridge/MainHandler.cpp b/programs/odbc-bridge/MainHandler.cpp index 0875cc2e9d9..9130b3e0f47 100644 --- a/programs/odbc-bridge/MainHandler.cpp +++ b/programs/odbc-bridge/MainHandler.cpp @@ -102,7 +102,9 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse std::string format = params.get("format", "RowBinary"); std::string connection_string = params.get("connection_string"); + bool use_connection_pooling = params.getParsed("use_connection_pooling", true); LOG_TRACE(log, "Connection string: '{}'", connection_string); + LOG_TRACE(log, "Use pooling: {}", use_connection_pooling); UInt64 max_block_size = DEFAULT_BLOCK_SIZE; if (params.has("max_block_size")) @@ -134,7 +136,7 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse try { nanodbc::ConnectionHolderPtr connection_handler; - if (getContext()->getSettingsRef().odbc_bridge_use_connection_pooling) + if (use_connection_pooling) connection_handler = ODBCPooledConnectionFactory::instance().get( validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); else diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 5bbc39dc559..3aa3d9a652b 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -163,7 +163,7 @@ void ODBCSource::insertValue( break; } default: - throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unsupported value type"); } } diff --git a/programs/odbc-bridge/ODBCPooledConnectionFactory.h b/programs/odbc-bridge/ODBCPooledConnectionFactory.h index f6185bffd1d..e425dea47f7 100644 --- a/programs/odbc-bridge/ODBCPooledConnectionFactory.h +++ b/programs/odbc-bridge/ODBCPooledConnectionFactory.h @@ -151,7 +151,7 @@ public: auto connection_available = pool->tryBorrowObject(connection, []() { return nullptr; }, ODBC_POOL_WAIT_TIMEOUT); if (!connection_available) - throw Exception("Unable to fetch connection within the timeout", ErrorCodes::NO_FREE_CONNECTION); + throw Exception(ErrorCodes::NO_FREE_CONNECTION, "Unable to fetch connection within the timeout"); try { diff --git a/programs/odbc-bridge/SchemaAllowedHandler.cpp b/programs/odbc-bridge/SchemaAllowedHandler.cpp index 4d20c8bc3b7..020359f51fd 100644 --- a/programs/odbc-bridge/SchemaAllowedHandler.cpp +++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp @@ -70,13 +70,19 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer return; } + bool use_connection_pooling = params.getParsed("use_connection_pooling", true); + try { std::string connection_string = params.get("connection_string"); - auto connection = ODBCPooledConnectionFactory::instance().get( - validateODBCConnectionString(connection_string), - getContext()->getSettingsRef().odbc_bridge_connection_pool_size); + nanodbc::ConnectionHolderPtr connection; + + if (use_connection_pooling) + connection = ODBCPooledConnectionFactory::instance().get( + validateODBCConnectionString(connection_string), getContext()->getSettingsRef().odbc_bridge_connection_pool_size); + else + connection = std::make_shared(validateODBCConnectionString(connection_string)); bool result = isSchemaAllowed(std::move(connection)); diff --git a/programs/odbc-bridge/getIdentifierQuote.cpp b/programs/odbc-bridge/getIdentifierQuote.cpp index 09cdd31bb2e..793e398363c 100644 --- a/programs/odbc-bridge/getIdentifierQuote.cpp +++ b/programs/odbc-bridge/getIdentifierQuote.cpp @@ -44,7 +44,8 @@ IdentifierQuotingStyle getQuotingStyle(nanodbc::ConnectionHolderPtr connection) else if (identifier_quote[0] == '"') return IdentifierQuotingStyle::DoubleQuotes; else - throw Exception("Can not map quote identifier '" + identifier_quote + "' to IdentifierQuotingStyle value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Can not map quote identifier '{}' to IdentifierQuotingStyle value", identifier_quote); } } diff --git a/programs/odbc-bridge/validateODBCConnectionString.cpp b/programs/odbc-bridge/validateODBCConnectionString.cpp index a87a8f58ede..6c6e11162b4 100644 --- a/programs/odbc-bridge/validateODBCConnectionString.cpp +++ b/programs/odbc-bridge/validateODBCConnectionString.cpp @@ -38,10 +38,10 @@ std::string validateODBCConnectionString(const std::string & connection_string) static constexpr size_t MAX_CONNECTION_STRING_SIZE = 1000; if (connection_string.empty()) - throw Exception("ODBC connection string cannot be empty", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string cannot be empty"); if (connection_string.size() >= MAX_CONNECTION_STRING_SIZE) - throw Exception("ODBC connection string is too long", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string is too long"); const char * pos = connection_string.data(); const char * end = pos + connection_string.size(); @@ -51,7 +51,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) while (pos < end && isWhitespaceASCII(*pos)) { if (*pos != ' ') - throw Exception("ODBC connection string parameter contains unusual whitespace character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter contains unusual whitespace character"); ++pos; } }; @@ -63,7 +63,8 @@ std::string validateODBCConnectionString(const std::string & connection_string) if (pos < end && isValidIdentifierBegin(*pos)) ++pos; else - throw Exception("ODBC connection string parameter name doesn't begin with valid identifier character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, + "ODBC connection string parameter name doesn't begin with valid identifier character"); /// Additionally allow dash and dot symbols in names. /// Strictly speaking, the name with that characters should be escaped. @@ -83,7 +84,8 @@ std::string validateODBCConnectionString(const std::string & connection_string) { signed char c = *pos; if (c < 32 || strchr("[]{}(),;?*=!@'\"", c) != nullptr) - throw Exception("ODBC connection string parameter value is unescaped and contains illegal character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, + "ODBC connection string parameter value is unescaped and contains illegal character"); ++pos; } @@ -97,7 +99,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) if (pos < end && *pos == '{') ++pos; else - throw Exception("ODBC connection string parameter value doesn't begin with opening curly brace", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter value doesn't begin with opening curly brace"); while (pos < end) { @@ -109,13 +111,13 @@ std::string validateODBCConnectionString(const std::string & connection_string) } if (*pos == 0) - throw Exception("ODBC connection string parameter value contains ASCII NUL character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter value contains ASCII NUL character"); res += *pos; ++pos; } - throw Exception("ODBC connection string parameter is escaped but there is no closing curly brace", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter is escaped but there is no closing curly brace"); }; auto read_value = [&] @@ -139,25 +141,25 @@ std::string validateODBCConnectionString(const std::string & connection_string) Poco::toUpperInPlace(name); if (name == "FILEDSN" || name == "SAVEFILE" || name == "DRIVER") - throw Exception("ODBC connection string has forbidden parameter", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string has forbidden parameter"); if (pos >= end) - throw Exception("ODBC connection string parameter doesn't have value", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter doesn't have value"); if (*pos == '=') ++pos; else - throw Exception("ODBC connection string parameter doesn't have value", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter doesn't have value"); skip_whitespaces(); std::string value = read_value(); skip_whitespaces(); if (name.size() > MAX_ELEMENT_SIZE || value.size() > MAX_ELEMENT_SIZE) - throw Exception("ODBC connection string has too long keyword or value", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string has too long keyword or value"); if (!parameters.emplace(name, value).second) - throw Exception("Duplicate parameter found in ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "Duplicate parameter found in ODBC connection string"); if (pos >= end) break; @@ -165,7 +167,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) if (*pos == ';') ++pos; else - throw Exception("Unexpected character found after parameter value in ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "Unexpected character found after parameter value in ODBC connection string"); } /// Reconstruct the connection string. @@ -173,12 +175,12 @@ std::string validateODBCConnectionString(const std::string & connection_string) auto it = parameters.find("DSN"); if (parameters.end() == it) - throw Exception("DSN parameter is mandatory for ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "DSN parameter is mandatory for ODBC connection string"); std::string dsn = it->second; if (dsn.empty()) - throw Exception("DSN parameter cannot be empty in ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "DSN parameter cannot be empty in ODBC connection string"); parameters.erase(it); @@ -241,7 +243,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) write_element(elem.first, elem.second); if (reconstructed_connection_string.size() >= MAX_CONNECTION_STRING_SIZE) - throw Exception("ODBC connection string is too long", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string is too long"); return reconstructed_connection_string; } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 419b80ccff2..7b1ab1b8180 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -257,7 +257,7 @@ static std::string getCanonicalPath(std::string && path) { Poco::trimInPlace(path); if (path.empty()) - throw Exception("path configuration parameter is empty", ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "path configuration parameter is empty"); if (path.back() != '/') path += '/'; return std::move(path); @@ -416,7 +416,7 @@ void Server::createServer( } else { - throw Exception{message, ErrorCodes::NETWORK_ERROR}; + throw Exception::createDeprecated(message, ErrorCodes::NETWORK_ERROR); } } } @@ -946,7 +946,7 @@ try if (effective_user_id == 0) { message += " Run under 'sudo -u " + data_owner + "'."; - throw Exception(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); + throw Exception::createDeprecated(message, ErrorCodes::MISMATCHING_USERS_FOR_PROCESS_AND_DATA); } else { @@ -1116,7 +1116,7 @@ try #endif if (config().has("interserver_http_port") && config().has("interserver_https_port")) - throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "Both http and https interserver ports are specified"); static const auto interserver_tags = { @@ -1141,7 +1141,7 @@ try int port = parse(port_str); if (port < 0 || port > 0xFFFF) - throw Exception("Out of range '" + String(port_tag) + "': " + toString(port), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Out of range '{}': {}", String(port_tag), port); global_context->setInterserverIOAddress(this_host, port); global_context->setInterserverScheme(scheme); @@ -1419,8 +1419,7 @@ try global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); } @@ -1465,7 +1464,7 @@ try size_t max_cache_size = static_cast(memory_amount * cache_size_to_ram_max_ratio); /// Size of cache for uncompressed blocks. Zero means disabled. - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", ""); + String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "SLRU"); LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy); size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); if (uncompressed_cache_size > max_cache_size) @@ -1491,7 +1490,7 @@ try /// Size of cache for marks (index of MergeTree family of tables). size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); - String mark_cache_policy = config().getString("mark_cache_policy", ""); + String mark_cache_policy = config().getString("mark_cache_policy", "SLRU"); if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) @@ -1517,6 +1516,15 @@ try if (mmap_cache_size) global_context->setMMappedFileCache(mmap_cache_size); + /// A cache for query results. + size_t query_cache_size = config().getUInt64("query_cache.size", 1_GiB); + if (query_cache_size) + global_context->setQueryCache( + query_cache_size, + config().getUInt64("query_cache.max_entries", 1024), + config().getUInt64("query_cache.max_entry_size", 1_MiB), + config().getUInt64("query_cache.max_entry_records", 30'000'000)); + #if USE_EMBEDDED_COMPILER /// 128 MB constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; @@ -1740,14 +1748,15 @@ try std::lock_guard lock(servers_lock); createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers); if (servers.empty()) - throw Exception( - "No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " + "to configuration file.)"); } if (servers.empty()) - throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " + "to configuration file.)"); #if USE_SSL CertificateReloader::instance().tryLoad(config()); @@ -1807,7 +1816,7 @@ try String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/"); int pool_size = config().getInt("distributed_ddl.pool_size", 1); if (pool_size < 1) - throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0"); global_context->setDDLWorker(std::make_unique(pool_size, ddl_zookeeper_path, global_context, &config(), "distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID)); @@ -1936,8 +1945,7 @@ std::unique_ptr Server::buildProtocolStackFromConfig( #if USE_SSL return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); #else - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif if (type == "proxy1") @@ -2104,8 +2112,7 @@ void Server::createServers( httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); #else UNUSED(port); - throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); #endif }); @@ -2167,8 +2174,7 @@ void Server::createServers( new Poco::Net::TCPServerParams)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); @@ -2273,8 +2279,7 @@ void Server::createServers( http_params)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); } diff --git a/programs/server/config.xml b/programs/server/config.xml index 9c7dc191ba3..bd46263f851 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -334,6 +334,19 @@ 10000 + + + @@ -1453,6 +1466,14 @@ --> + + + + + + + + always return 1 + if (start_pos == nullptr) + { + for (auto & r : res) + r = 1; + return; + } + + ColumnString::Offset prev_offset = 0; + size_t rows = haystack_offsets.size(); + + if (const ColumnConst * start_pos_const = typeid_cast(&*start_pos)) + { + /// Needle is empty and start_pos is constant + UInt64 start = std::max(start_pos_const->getUInt(0), static_cast(1)); + for (size_t i = 0; i < rows; ++i) + { + size_t haystack_size = Impl::countChars( + reinterpret_cast(pos), reinterpret_cast(pos + haystack_offsets[i] - prev_offset - 1)); + res[i] = (start <= haystack_size + 1) ? start : 0; + + pos = begin + haystack_offsets[i]; + prev_offset = haystack_offsets[i]; + } + return; + } + else + { + /// Needle is empty and start_pos is not constant + for (size_t i = 0; i < rows; ++i) + { + size_t haystack_size = Impl::countChars( + reinterpret_cast(pos), reinterpret_cast(pos + haystack_offsets[i] - prev_offset - 1)); + UInt64 start = start_pos->getUInt(i); + start = std::max(static_cast(1), start); + res[i] = (start <= haystack_size + 1) ? start : 0; + + pos = begin + haystack_offsets[i]; + prev_offset = haystack_offsets[i]; + } + return; + } + } + /// Current index in the array of strings. size_t i = 0; @@ -249,7 +300,7 @@ struct PositionImpl { auto start = std::max(start_pos, UInt64(1)); - if (needle.size() == 0) + if (needle.empty()) { size_t haystack_size = Impl::countChars(data.data(), data.data() + data.size()); res = start <= haystack_size + 1 ? start : 0; @@ -269,8 +320,12 @@ struct PositionImpl std::string data, std::string needle, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + Impl::toLowerIfNeed(data); Impl::toLowerIfNeed(needle); @@ -303,8 +358,12 @@ struct PositionImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + ColumnString::Offset prev_haystack_offset = 0; ColumnString::Offset prev_needle_offset = 0; @@ -363,10 +422,13 @@ struct PositionImpl const ColumnString::Chars & needle_data, const ColumnString::Offsets & needle_offsets, const ColumnPtr & start_pos, - PaddedPODArray & res) + PaddedPODArray & res, + [[maybe_unused]] ColumnUInt8 * res_null) { - /// NOTE You could use haystack indexing. But this is a rare case. + /// `res_null` serves as an output parameter for implementing an XYZOrNull variant. + assert(!res_null); + /// NOTE You could use haystack indexing. But this is a rare case. ColumnString::Offset prev_needle_offset = 0; size_t size = needle_offsets.size(); diff --git a/src/Functions/Regexps.h b/src/Functions/Regexps.h index 0380e8d1750..123640e4db3 100644 --- a/src/Functions/Regexps.h +++ b/src/Functions/Regexps.h @@ -240,7 +240,7 @@ inline Regexps constructRegexps(const std::vector & str_patterns, [[mayb CompilerErrorPtr error(compile_error); if (error->expression < 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, String(error->message)); + throw Exception::createRuntime(ErrorCodes::LOGICAL_ERROR, String(error->message)); else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Pattern '{}' failed with error '{}'", str_patterns[error->expression], String(error->message)); } diff --git a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h index 88aa2e72db9..5862265ce7d 100644 --- a/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h +++ b/src/Functions/URL/FirstSignificantSubdomainCustomImpl.h @@ -76,9 +76,8 @@ public: return col_res; } else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } static void vector(FirstSignificantSubdomainCustomLookup & tld_lookup, diff --git a/src/Functions/URL/FunctionsURL.h b/src/Functions/URL/FunctionsURL.h index a0f106742fb..362042e31e1 100644 --- a/src/Functions/URL/FunctionsURL.h +++ b/src/Functions/URL/FunctionsURL.h @@ -101,7 +101,7 @@ struct ExtractSubstringImpl static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions"); } }; @@ -156,7 +156,7 @@ struct CutSubstringImpl static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions"); } }; diff --git a/src/Functions/URL/URLHierarchy.cpp b/src/Functions/URL/URLHierarchy.cpp index 80c775ea32d..d3a45efb498 100644 --- a/src/Functions/URL/URLHierarchy.cpp +++ b/src/Functions/URL/URLHierarchy.cpp @@ -26,8 +26,8 @@ public: static void checkArguments(const DataTypes & arguments) { if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Must be String.", arguments[0]->getName(), getName()); } void init(const ColumnsWithTypeAndName & /*arguments*/) {} diff --git a/src/Functions/URL/URLPathHierarchy.cpp b/src/Functions/URL/URLPathHierarchy.cpp index b8795e91244..3775748f6ed 100644 --- a/src/Functions/URL/URLPathHierarchy.cpp +++ b/src/Functions/URL/URLPathHierarchy.cpp @@ -25,8 +25,8 @@ public: static void checkArguments(const DataTypes & arguments) { if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Must be String.", arguments[0]->getName(), getName()); } void init(const ColumnsWithTypeAndName & /*arguments*/) {} diff --git a/src/Functions/URL/decodeURLComponent.cpp b/src/Functions/URL/decodeURLComponent.cpp index eb44ca005a8..9e516e73e3c 100644 --- a/src/Functions/URL/decodeURLComponent.cpp +++ b/src/Functions/URL/decodeURLComponent.cpp @@ -158,7 +158,7 @@ struct CodeURLComponentImpl [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Column of type FixedString is not supported by URL functions", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by URL functions"); } }; diff --git a/src/Functions/URL/extractURLParameterNames.cpp b/src/Functions/URL/extractURLParameterNames.cpp index aaedc6eaac1..4ca2d79d22d 100644 --- a/src/Functions/URL/extractURLParameterNames.cpp +++ b/src/Functions/URL/extractURLParameterNames.cpp @@ -25,8 +25,8 @@ public: static void checkArguments(const DataTypes & arguments) { if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Must be String.", arguments[0]->getName(), getName()); } /// Returns the position of the argument that is the column of rows diff --git a/src/Functions/URL/extractURLParameters.cpp b/src/Functions/URL/extractURLParameters.cpp index f83b11c6cc7..a44157e1b35 100644 --- a/src/Functions/URL/extractURLParameters.cpp +++ b/src/Functions/URL/extractURLParameters.cpp @@ -25,8 +25,8 @@ public: static void checkArguments(const DataTypes & arguments) { if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Must be String.", arguments[0]->getName(), getName()); } void init(const ColumnsWithTypeAndName & /*arguments*/) {} diff --git a/src/Functions/URL/port.cpp b/src/Functions/URL/port.cpp index 52fa4077c18..65cebae15f1 100644 --- a/src/Functions/URL/port.cpp +++ b/src/Functions/URL/port.cpp @@ -30,17 +30,17 @@ struct FunctionPortImpl : public IFunction DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1 && arguments.size() != 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + std::to_string(arguments.size()) + ", should be 1 or 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2", + getName(), arguments.size()); if (!WhichDataType(arguments[0].type).isString()) - throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Must be String.", arguments[0].type->getName(), getName()); if (arguments.size() == 2 && !WhichDataType(arguments[1].type).isUInt16()) - throw Exception("Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + ". Must be UInt16.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}. " + "Must be UInt16.", arguments[1].type->getName(), getName()); return std::make_shared(); } @@ -53,7 +53,7 @@ struct FunctionPortImpl : public IFunction { const auto * port_column = checkAndGetColumn(arguments[1].column.get()); if (!port_column) - throw Exception("Second argument for function " + getName() + " must be constant UInt16", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be constant UInt16", getName()); default_port = port_column->getValue(); } @@ -68,9 +68,8 @@ struct FunctionPortImpl : public IFunction return col_res; } else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } private: diff --git a/src/Functions/UTCTimestamp.cpp b/src/Functions/UTCTimestamp.cpp index be137449fd5..258a5fa5d94 100644 --- a/src/Functions/UTCTimestamp.cpp +++ b/src/Functions/UTCTimestamp.cpp @@ -85,7 +85,7 @@ public: { if (!arguments.empty()) { - throw Exception("Arguments size of function " + getName() + " should be 0", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0", getName()); } return std::make_shared(); @@ -95,7 +95,7 @@ public: { if (!arguments.empty()) { - throw Exception("Arguments size of function " + getName() + " should be 0", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0", getName()); } return std::make_unique(time(nullptr), DataTypes(), std::make_shared("UTC")); diff --git a/src/Functions/UniqTheta/FunctionsUniqTheta.h b/src/Functions/UniqTheta/FunctionsUniqTheta.h index 331ca92e3f8..2d616841c7f 100644 --- a/src/Functions/UniqTheta/FunctionsUniqTheta.h +++ b/src/Functions/UniqTheta/FunctionsUniqTheta.h @@ -76,33 +76,32 @@ namespace DB { const auto * sketch_type0 = typeid_cast(arguments[0].get()); if (!(sketch_type0 && sketch_type0->getFunctionName() == "uniqTheta")) - throw Exception( - "First argument for function " + getName() + " must be a uniqTheta but it has type " + arguments[0]->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be a uniqTheta but it has type {}", + getName(), arguments[0]->getName()); const auto * sketch_type1 = typeid_cast(arguments[1].get()); if (!(sketch_type1 && sketch_type1->getFunctionName() == "uniqTheta")) - throw Exception( - "Second argument for function " + getName() + " must be a uniqTheta but it has type " + arguments[1]->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument for function {} must be a uniqTheta but it has type {}", + getName(), arguments[1]->getName()); const DataTypes & arg_data_types0 = sketch_type0->getArgumentsDataTypes(); const DataTypes & arg_data_types1 = sketch_type1->getArgumentsDataTypes(); if (arg_data_types0.size() != arg_data_types1.size()) - throw Exception( - "The nested type in uniqThetas must be the same length, but one is " + std::to_string(arg_data_types0.size()) - + ", and the other is " + std::to_string(arg_data_types1.size()), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The nested type in uniqThetas must be the same length, " + "but one is {}, and the other is {}", arg_data_types0.size(), arg_data_types1.size()); size_t types_size = arg_data_types0.size(); for (size_t i = 0; i < types_size; ++i) { if (!arg_data_types0[i]->equals(*arg_data_types1[i])) - throw Exception( - "The " + std::to_string(i) + "th nested type in uniqThetas must be the same, but one is " + arg_data_types0[i]->getName() - + ", and the other is " + arg_data_types1[i]->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The {}th nested type in uniqThetas must be the same, " + "but one is {}, and the other is {}", + i, arg_data_types0[i]->getName(), arg_data_types1[i]->getName()); } diff --git a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp index 93466be54fb..d67c48f166d 100644 --- a/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp +++ b/src/Functions/UserDefined/UserDefinedSQLObjectsLoaderFromDisk.cpp @@ -172,7 +172,8 @@ void UserDefinedSQLObjectsLoaderFromDisk::createDirectory() std::error_code create_dir_error_code; fs::create_directories(dir_path, create_dir_error_code); if (!fs::exists(dir_path) || !fs::is_directory(dir_path) || create_dir_error_code) - throw Exception("Couldn't create directory " + dir_path + " reason: '" + create_dir_error_code.message() + "'", ErrorCodes::DIRECTORY_DOESNT_EXIST); + throw Exception(ErrorCodes::DIRECTORY_DOESNT_EXIST, "Couldn't create directory {} reason: '{}'", + dir_path, create_dir_error_code.message()); } diff --git a/src/Functions/addressToLine.h b/src/Functions/addressToLine.h index 4fd55017144..1410e55d9a9 100644 --- a/src/Functions/addressToLine.h +++ b/src/Functions/addressToLine.h @@ -44,16 +44,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1) - throw Exception( - "Function " + getName() + " needs exactly one argument; passed " + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument; passed {}.", + getName(), arguments.size()); const auto & type = arguments[0].type; if (!WhichDataType(type.get()).isUInt64()) - throw Exception( - "The only argument for function " + getName() + " must be UInt64. Found " + type->getName() + " instead.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be UInt64. " + "Found {} instead.", getName(), type->getName()); return getDataType(); } @@ -66,8 +64,8 @@ public: const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); if (!column_concrete) - throw Exception( - "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + column->getName(), getName()); const typename ColumnVector::Container & data = column_concrete->getData(); return getResultColumn(data, input_rows_count); diff --git a/src/Functions/addressToSymbol.cpp b/src/Functions/addressToSymbol.cpp index dd9efd6cc44..95d57f6d296 100644 --- a/src/Functions/addressToSymbol.cpp +++ b/src/Functions/addressToSymbol.cpp @@ -49,14 +49,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1) - throw Exception("Function " + getName() + " needs exactly one argument; passed " - + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument; passed {}.", + getName(), arguments.size()); const auto & type = arguments[0].type; if (!WhichDataType(type.get()).isUInt64()) - throw Exception("The only argument for function " + getName() + " must be UInt64. Found " - + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be UInt64. " + "Found {} instead.", getName(), type->getName()); return std::make_shared(); } @@ -75,7 +75,7 @@ public: const ColumnUInt64 * column_concrete = checkAndGetColumn(column.get()); if (!column_concrete) - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column->getName(), getName()); const typename ColumnVector::Container & data = column_concrete->getData(); auto result_column = ColumnString::create(); diff --git a/src/Functions/appendTrailingCharIfAbsent.cpp b/src/Functions/appendTrailingCharIfAbsent.cpp index 9fe141458fb..62c0bbd4598 100644 --- a/src/Functions/appendTrailingCharIfAbsent.cpp +++ b/src/Functions/appendTrailingCharIfAbsent.cpp @@ -46,10 +46,10 @@ private: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception{"Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); if (!isString(arguments[1])) - throw Exception{"Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[1]->getName(), getName()); return std::make_shared(); } @@ -63,12 +63,12 @@ private: const auto & column_char = arguments[1].column; if (!checkColumnConst(column_char.get())) - throw Exception{"Second argument of function " + getName() + " must be a constant string", ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function {} must be a constant string", getName()); String trailing_char_str = assert_cast(*column_char).getValue(); if (trailing_char_str.size() != 1) - throw Exception{"Second argument of function " + getName() + " must be a one-character string", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument of function {} must be a one-character string", getName()); if (const auto * col = checkAndGetColumn(column.get())) { @@ -108,8 +108,8 @@ private: return col_res; } else - throw Exception{"Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } }; diff --git a/src/Functions/array/FunctionArrayMapped.h b/src/Functions/array/FunctionArrayMapped.h index 1484cc91535..89599edd9d1 100644 --- a/src/Functions/array/FunctionArrayMapped.h +++ b/src/Functions/array/FunctionArrayMapped.h @@ -162,14 +162,14 @@ public: const auto * data_type = checkAndGetDataType(arguments[0].type.get()); if (!data_type) - throw Exception("The only argument for function " + getName() + " must be array. Found " - + arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be array. " + "Found {} instead", getName(), arguments[0].type->getName()); DataTypePtr nested_type = data_type->getNestedType(); if (Impl::needBoolean() && !isUInt8(nested_type)) - throw Exception("The only argument for function " + getName() + " must be array of UInt8. Found " - + arguments[0].type->getName() + " instead", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be array of UInt8. " + "Found {} instead", getName(), arguments[0].type->getName()); if constexpr (is_argument_type_array) return Impl::getReturnType(nested_type, nested_type); @@ -179,8 +179,7 @@ public: else { if (arguments.size() > 2 && Impl::needOneArray()) - throw Exception("Function " + getName() + " needs one argument with data", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs one argument with data", getName()); const auto * data_type_function = checkAndGetDataType(arguments[0].type.get()); @@ -200,8 +199,9 @@ public: /// - lambda may return Nothing or Nullable(Nothing) because of default implementation of functions /// for these types. In this case we will just create UInt8 const column full of 0. if (Impl::needBoolean() && !isUInt8(removeNullable(return_type)) && !isNothing(removeNullable(return_type))) - throw Exception("Expression for function " + getName() + " must return UInt8 or Nullable(UInt8), found " - + return_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Expression for function {} must return UInt8 or Nullable(UInt8), found {}", + getName(), return_type->getName()); static_assert(is_argument_type_map || is_argument_type_array, "unsupported type"); @@ -259,14 +259,12 @@ public: const auto & column_with_type_and_name = arguments[0]; if (!column_with_type_and_name.column) - throw Exception("First argument for function " + getName() + " must be a function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); const auto * column_function = typeid_cast(column_with_type_and_name.column.get()); if (!column_function) - throw Exception("First argument for function " + getName() + " must be a function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a function.", getName()); ColumnPtr offsets_column; diff --git a/src/Functions/array/arrayAUC.cpp b/src/Functions/array/arrayAUC.cpp index 78c6fae59ca..2890ae55886 100644 --- a/src/Functions/array/arrayAUC.cpp +++ b/src/Functions/array/arrayAUC.cpp @@ -86,7 +86,7 @@ public: static DataTypePtr getReturnType(const DataTypePtr & /* score_type */, const DataTypePtr & label_type) { if (!(isNumber(label_type) || isEnum(label_type))) - throw Exception(std::string(NameArrayAUC::name) + " label must have numeric type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} label must have numeric type.", std::string(NameArrayAUC::name)); return std::make_shared>(); } diff --git a/src/Functions/array/arrayAggregation.cpp b/src/Functions/array/arrayAggregation.cpp index c8eae78dfaa..75ea3a81a7d 100644 --- a/src/Functions/array/arrayAggregation.cpp +++ b/src/Functions/array/arrayAggregation.cpp @@ -132,9 +132,8 @@ struct ArrayAggregateImpl if (!callOnIndexAndDataType(expression_return->getTypeId(), call)) { - throw Exception( - "array aggregation function cannot be performed on type " + expression_return->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "array aggregation function cannot be performed on type {}", + expression_return->getName()); } return result; @@ -368,7 +367,7 @@ struct ArrayAggregateImpl executeType(mapped, offsets, res)) return res; else - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for arraySum: {}" + mapped->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for arraySum: {}", mapped->getName()); } }; diff --git a/src/Functions/array/arrayAll.cpp b/src/Functions/array/arrayAll.cpp index 960f1f59fc0..1d02342b704 100644 --- a/src/Functions/array/arrayAll.cpp +++ b/src/Functions/array/arrayAll.cpp @@ -37,7 +37,7 @@ struct ArrayAllImpl const auto * column_filter_const = checkAndGetColumnConst(&*mapped); if (!column_filter_const) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column"); if (column_filter_const->getValue()) return DataTypeUInt8().createColumnConst(array.size(), 1u); diff --git a/src/Functions/array/arrayConcat.cpp b/src/Functions/array/arrayConcat.cpp index 1878cc27323..c49565d7b23 100644 --- a/src/Functions/array/arrayConcat.cpp +++ b/src/Functions/array/arrayConcat.cpp @@ -37,14 +37,15 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception{"Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); for (auto i : collections::range(0, arguments.size())) { const auto * array_type = typeid_cast(arguments[i].get()); if (!array_type) - throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type " - + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must be an array but it has type {}.", + i, getName(), arguments[i]->getName()); } return getLeastSupertype(arguments); @@ -86,7 +87,7 @@ public: if (const auto * argument_column_array = typeid_cast(argument_column.get())) sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows)); else - throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName()); } auto sink = GatherUtils::concat(sources); diff --git a/src/Functions/array/arrayCount.cpp b/src/Functions/array/arrayCount.cpp index f7ded051e5e..9aeefbab4fe 100644 --- a/src/Functions/array/arrayCount.cpp +++ b/src/Functions/array/arrayCount.cpp @@ -38,7 +38,7 @@ struct ArrayCountImpl const auto * column_filter_const = checkAndGetColumnConst(&*mapped); if (!column_filter_const) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column"); if (column_filter_const->getValue()) { diff --git a/src/Functions/array/arrayCumSum.cpp b/src/Functions/array/arrayCumSum.cpp index 3bb733482b6..489014ebe55 100644 --- a/src/Functions/array/arrayCumSum.cpp +++ b/src/Functions/array/arrayCumSum.cpp @@ -45,7 +45,7 @@ struct ArrayCumSumImpl return std::make_shared(nested); } - throw Exception("arrayCumSum cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "arrayCumSum cannot add values of type {}", expression_return->getName()); } @@ -154,7 +154,7 @@ struct ArrayCumSumImpl executeType(mapped, array, res)) return res; else - throw Exception("Unexpected column for arrayCumSum: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for arrayCumSum: {}", mapped->getName()); } }; diff --git a/src/Functions/array/arrayCumSumNonNegative.cpp b/src/Functions/array/arrayCumSumNonNegative.cpp index 1e27b773141..c0062fd8230 100644 --- a/src/Functions/array/arrayCumSumNonNegative.cpp +++ b/src/Functions/array/arrayCumSumNonNegative.cpp @@ -46,7 +46,8 @@ struct ArrayCumSumNonNegativeImpl return std::make_shared(nested); } - throw Exception("arrayCumSumNonNegativeImpl cannot add values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "arrayCumSumNonNegativeImpl cannot add values of type {}", expression_return->getName()); } @@ -118,7 +119,7 @@ struct ArrayCumSumNonNegativeImpl executeType(mapped, array, res)) return res; else - throw Exception("Unexpected column for arrayCumSumNonNegativeImpl: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for arrayCumSumNonNegativeImpl: {}", mapped->getName()); } }; diff --git a/src/Functions/array/arrayDifference.cpp b/src/Functions/array/arrayDifference.cpp index 118fe87270b..ab7b94f98f1 100644 --- a/src/Functions/array/arrayDifference.cpp +++ b/src/Functions/array/arrayDifference.cpp @@ -47,7 +47,7 @@ struct ArrayDifferenceImpl if (which.isDecimal()) return std::make_shared(expression_return); - throw Exception("arrayDifference cannot process values of type " + expression_return->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "arrayDifference cannot process values of type {}", expression_return->getName()); } @@ -148,7 +148,7 @@ struct ArrayDifferenceImpl executeType(mapped, array, res)) return res; else - throw Exception("Unexpected column for arrayDifference: " + mapped->getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for arrayDifference: {}", mapped->getName()); } }; diff --git a/src/Functions/array/arrayDistinct.cpp b/src/Functions/array/arrayDistinct.cpp index 46b0efd2634..527624794ea 100644 --- a/src/Functions/array/arrayDistinct.cpp +++ b/src/Functions/array/arrayDistinct.cpp @@ -48,9 +48,8 @@ public: { const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); if (!array_type) - throw Exception("Argument for function " + getName() + " must be array but it " - " has type " + arguments[0]->getName() + ".", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be array but it has type {}.", + getName(), arguments[0]->getName()); auto nested_type = removeNullable(array_type->getNestedType()); diff --git a/src/Functions/array/arrayElement.cpp b/src/Functions/array/arrayElement.cpp index 3e43d8d34ee..299f25b8292 100644 --- a/src/Functions/array/arrayElement.cpp +++ b/src/Functions/array/arrayElement.cpp @@ -496,7 +496,7 @@ ColumnPtr FunctionArrayElement::executeNumberConst( col_nested->getData(), col_array->getOffsets(), -(static_cast(index.safeGet()) + 1), col_res->getData(), builder); } else - throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); return col_res; } @@ -558,7 +558,7 @@ FunctionArrayElement::executeStringConst(const ColumnsWithTypeAndName & argument col_res->getOffsets(), builder); else - throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); return col_res; } @@ -610,7 +610,7 @@ ColumnPtr FunctionArrayElement::executeGenericConst( ArrayElementGenericImpl::vectorConst( col_nested, col_array->getOffsets(), -(static_cast(index.get() + 1)), *col_res, builder); else - throw Exception("Illegal type of array index", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Illegal type of array index"); return col_res; } @@ -703,8 +703,8 @@ ColumnPtr FunctionArrayElement::executeArgument( || (res = executeConst(arguments, result_type, index_data, builder, input_rows_count)) || (res = executeString(arguments, index_data, builder)) || (res = executeGeneric(arguments, index_data, builder)))) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); return res; } @@ -1099,8 +1099,8 @@ ColumnPtr FunctionArrayElement::executeImpl(const ColumnsWithTypeAndName & argum if (col_const_array) is_array_of_nullable = isColumnNullable(col_const_array->getData()); else - throw Exception("Illegal column " + arguments[0].column->getName() - + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } if (!is_array_of_nullable) @@ -1179,8 +1179,7 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments || (res = executeArgument(arguments, result_type, builder, input_rows_count)) || (res = executeArgument(arguments, result_type, builder, input_rows_count)) || (res = executeArgument(arguments, result_type, builder, input_rows_count)))) - throw Exception("Second argument for function " + getName() + " must have UInt or Int type.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must have UInt or Int type.", getName()); } else { @@ -1190,7 +1189,7 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments builder.initSink(input_rows_count); if (index == 0u) - throw Exception("Array indices are 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); + throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Array indices are 1-based"); if (!((res = executeNumberConst(arguments, index, builder)) || (res = executeNumberConst(arguments, index, builder)) @@ -1204,9 +1203,8 @@ ColumnPtr FunctionArrayElement::perform(const ColumnsWithTypeAndName & arguments || (res = executeNumberConst(arguments, index, builder)) || (res = executeStringConst (arguments, index, builder)) || (res = executeGenericConst (arguments, index, builder)))) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } return res; diff --git a/src/Functions/array/arrayEnumerate.cpp b/src/Functions/array/arrayEnumerate.cpp index 666e01899bd..175989912dd 100644 --- a/src/Functions/array/arrayEnumerate.cpp +++ b/src/Functions/array/arrayEnumerate.cpp @@ -40,8 +40,9 @@ public: { const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); if (!array_type) - throw Exception("First argument for function " + getName() + " must be an array but it has type " - + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be an array but it has type {}.", + getName(), arguments[0]->getName()); return std::make_shared(std::make_shared()); } @@ -68,9 +69,8 @@ public: } else { - throw Exception("Illegal column " + arguments[0].column->getName() - + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } } }; diff --git a/src/Functions/array/arrayEnumerateExtended.h b/src/Functions/array/arrayEnumerateExtended.h index 6dfa8d348db..c3d69bb6972 100644 --- a/src/Functions/array/arrayEnumerateExtended.h +++ b/src/Functions/array/arrayEnumerateExtended.h @@ -42,16 +42,17 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", + getName(), arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) { const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); if (!array_type) - throw Exception("All arguments for function " + getName() + " must be arrays but argument " + - toString(i + 1) + " has type " + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "All arguments for function {} must be arrays but argument {} has type {}.", + getName(), i + 1, arguments[i]->getName()); } return std::make_shared(std::make_shared()); @@ -139,9 +140,8 @@ ColumnPtr FunctionArrayEnumerateExtended::executeImpl(const ColumnsWith const ColumnConst * const_array = checkAndGetColumnConst( arguments[i].column.get()); if (!const_array) - throw Exception("Illegal column " + arguments[i].column->getName() - + " of " + toString(i + 1) + "-th argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of {}-th argument of function {}", + arguments[i].column->getName(), i + 1, getName()); array_holders.emplace_back(const_array->convertToFullColumn()); array = checkAndGetColumn(array_holders.back().get()); } @@ -153,8 +153,8 @@ ColumnPtr FunctionArrayEnumerateExtended::executeImpl(const ColumnsWith offsets_column = array->getOffsetsPtr(); } else if (offsets_i != *offsets) - throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + getName()); const auto * array_data = &array->getData(); data_columns[i] = array_data; diff --git a/src/Functions/array/arrayEnumerateRanked.cpp b/src/Functions/array/arrayEnumerateRanked.cpp index d19781f97c3..dd597d607dc 100644 --- a/src/Functions/array/arrayEnumerateRanked.cpp +++ b/src/Functions/array/arrayEnumerateRanked.cpp @@ -49,9 +49,10 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) { UInt64 value = assert_cast(*depth_column).getValue(); if (!value) - throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: depth (" - + std::to_string(value) + ") cannot be less or equal 0.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Incorrect arguments for function arrayEnumerateUniqRanked " + "or arrayEnumerateDenseRanked: depth ({}) cannot be less or equal 0.", + std::to_string(value)); if (i == 0) { @@ -60,14 +61,15 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) else { if (depths.size() >= array_num) - throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: depth (" - + std::to_string(value) + ") for missing array.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Incorrect arguments for function arrayEnumerateUniqRanked " + "or arrayEnumerateDenseRanked: depth ({}) for missing array.", + std::to_string(value)); if (value > prev_array_depth) - throw Exception( - "Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: depth=" - + std::to_string(value) + " for array with depth=" + std::to_string(prev_array_depth) + ".", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Arguments for function arrayEnumerateUniqRanked/arrayEnumerateDenseRanked incorrect: depth={}" + " for array with depth={}.", + std::to_string(value), std::to_string(prev_array_depth)); depths.emplace_back(value); } @@ -79,17 +81,19 @@ ArraysDepths getArraysDepths(const ColumnsWithTypeAndName & arguments) depths.emplace_back(prev_array_depth); if (depths.empty()) - throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: at least one array should be passed.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: " + "at least one array should be passed."); DepthType max_array_depth = 0; for (auto depth : depths) max_array_depth = std::max(depth, max_array_depth); if (clear_depth > max_array_depth) - throw Exception("Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: clear_depth (" - + std::to_string(clear_depth) + ") can't be larger than max_array_depth (" + std::to_string(max_array_depth) + ").", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Incorrect arguments for function arrayEnumerateUniqRanked or arrayEnumerateDenseRanked: " + "clear_depth ({}) can't be larger than max_array_depth ({}).", + std::to_string(clear_depth), std::to_string(max_array_depth)); return {clear_depth, depths, max_array_depth}; } diff --git a/src/Functions/array/arrayEnumerateRanked.h b/src/Functions/array/arrayEnumerateRanked.h index ac3936af0fd..73feb3e46ea 100644 --- a/src/Functions/array/arrayEnumerateRanked.h +++ b/src/Functions/array/arrayEnumerateRanked.h @@ -101,10 +101,9 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.empty()) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + std::to_string(arguments.size()) - + ", should be at least 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", + getName(), arguments.size()); const ArraysDepths arrays_depths = getArraysDepths(arguments); @@ -195,9 +194,8 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[0] != array->getOffsets()) { - throw Exception( - "Lengths and effective depths of all arrays passed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); } } @@ -219,20 +217,17 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( { if (*offsets_by_depth[col_depth] != array->getOffsets()) { - throw Exception( - "Lengths and effective depths of all arrays passed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "Lengths and effective depths of all arrays passed to {} must be equal.", getName()); } } } if (col_depth < arrays_depths.depths[array_num]) { - throw Exception( - getName() + ": Passed array number " + std::to_string(array_num) + " depth (" - + std::to_string(arrays_depths.depths[array_num]) + ") is more than the actual array depth (" - + std::to_string(col_depth) + ").", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "{}: Passed array number {} depth ({}) is more than the actual array depth ({}).", + getName(), array_num, std::to_string(arrays_depths.depths[array_num]), col_depth); } auto * array_data = &array->getData(); @@ -241,7 +236,7 @@ ColumnPtr FunctionArrayEnumerateRankedExtended::executeImpl( } if (offsets_by_depth.empty()) - throw Exception("No arrays passed to function " + getName(), ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "No arrays passed to function {}", getName()); auto res_nested = ColumnUInt32::create(); diff --git a/src/Functions/array/arrayExists.cpp b/src/Functions/array/arrayExists.cpp index c1731b05229..d80556b1475 100644 --- a/src/Functions/array/arrayExists.cpp +++ b/src/Functions/array/arrayExists.cpp @@ -38,7 +38,7 @@ struct ArrayExistsImpl const auto * column_filter_const = checkAndGetColumnConst(&*mapped); if (!column_filter_const) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column"); if (column_filter_const->getValue()) { diff --git a/src/Functions/array/arrayFill.cpp b/src/Functions/array/arrayFill.cpp index bd424c3f474..b395584b8e2 100644 --- a/src/Functions/array/arrayFill.cpp +++ b/src/Functions/array/arrayFill.cpp @@ -92,7 +92,7 @@ struct ArrayFillImpl const auto * column_fill_const = checkAndGetColumnConst(&*mapped); if (!column_fill_const) - throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of cut column"); if (column_fill_const->getValue()) return ColumnArray::create( diff --git a/src/Functions/array/arrayFilter.cpp b/src/Functions/array/arrayFilter.cpp index b66c8570f11..528fb7bc5c7 100644 --- a/src/Functions/array/arrayFilter.cpp +++ b/src/Functions/array/arrayFilter.cpp @@ -38,7 +38,7 @@ struct ArrayFilterImpl const auto * column_filter_const = checkAndGetColumnConst(&*mapped); if (!column_filter_const) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column"); if (column_filter_const->getValue()) return array.clone(); diff --git a/src/Functions/array/arrayFirstLast.cpp b/src/Functions/array/arrayFirstLast.cpp index fa72ecba161..6a270081169 100644 --- a/src/Functions/array/arrayFirstLast.cpp +++ b/src/Functions/array/arrayFirstLast.cpp @@ -62,7 +62,7 @@ struct ArrayFirstLastImpl const auto * column_filter_const = checkAndGetColumnConst(&*mapped); if (!column_filter_const) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column"); if (column_filter_const->getValue()) { diff --git a/src/Functions/array/arrayFirstLastIndex.cpp b/src/Functions/array/arrayFirstLastIndex.cpp index effcb04ab48..f985075b1d8 100644 --- a/src/Functions/array/arrayFirstLastIndex.cpp +++ b/src/Functions/array/arrayFirstLastIndex.cpp @@ -42,7 +42,7 @@ struct ArrayFirstLastIndexImpl const auto * column_filter_const = checkAndGetColumnConst(&*mapped); if (!column_filter_const) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column"); if (column_filter_const->getValue()) { diff --git a/src/Functions/array/arrayFlatten.cpp b/src/Functions/array/arrayFlatten.cpp index b1f9399776e..d4eb8eebeee 100644 --- a/src/Functions/array/arrayFlatten.cpp +++ b/src/Functions/array/arrayFlatten.cpp @@ -29,9 +29,8 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isArray(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName() + - ", expected Array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected Array", + arguments[0]->getName(), getName()); DataTypePtr nested_type = arguments[0]; while (isArray(nested_type)) @@ -83,8 +82,8 @@ result: Row 1: [1, 2, 3], Row2: [4] const ColumnArray * src_col = checkAndGetColumn(arguments[0].column.get()); if (!src_col) - throw Exception("Illegal column " + arguments[0].column->getName() + " in argument of function 'arrayFlatten'", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} in argument of function 'arrayFlatten'", + arguments[0].column->getName()); const IColumn::Offsets & src_offsets = src_col->getOffsets(); diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index 7f0946e3e50..296cc588aa2 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -583,9 +583,7 @@ private: if (auto res = executeLowCardinality(arguments)) return res; - throw Exception( - "Illegal internal type of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName()); } } @@ -594,9 +592,7 @@ private: || (res = executeConst(arguments, result_type)) || (res = executeString(arguments)) || (res = executeGeneric(arguments)))) - throw Exception( - "Illegal internal type of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal internal type of first argument of function {}", getName()); return res; } @@ -928,9 +924,7 @@ private: null_map_data, null_map_item); else - throw Exception( - "Logical error: ColumnConst contains not String nor FixedString column", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Logical error: ColumnConst contains not String nor FixedString column"); } else if (const auto *const item_arg_vector = checkAndGetColumn(&data.right)) { diff --git a/src/Functions/array/arrayIntersect.cpp b/src/Functions/array/arrayIntersect.cpp index 4ed946d8bb3..1a718595a3b 100644 --- a/src/Functions/array/arrayIntersect.cpp +++ b/src/Functions/array/arrayIntersect.cpp @@ -133,14 +133,15 @@ DataTypePtr FunctionArrayIntersect::getReturnTypeImpl(const DataTypes & argument bool has_nothing = false; if (arguments.empty()) - throw Exception{"Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); for (auto i : collections::range(0, arguments.size())) { const auto * array_type = typeid_cast(arguments[i].get()); if (!array_type) - throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type " - + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must be an array but it has type {}.", + i, getName(), arguments[i]->getName()); const auto & nested_type = array_type->getNestedType(); @@ -178,8 +179,8 @@ ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, c { const auto * array_type = checkAndGetDataType(data_type.get()); if (!array_type) - throw Exception{"Cannot cast array column to column with type " - + data_type->getName() + " in function " + getName(), ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cast array column to column with type {} in function {}", + data_type->getName(), getName()); auto casted_column = castRemoveNullable(column_array->getDataPtr(), array_type->getNestedType()); return ColumnArray::create(casted_column, column_array->getOffsetsPtr()); @@ -189,8 +190,8 @@ ColumnPtr FunctionArrayIntersect::castRemoveNullable(const ColumnPtr & column, c const auto * tuple_type = checkAndGetDataType(data_type.get()); if (!tuple_type) - throw Exception{"Cannot cast tuple column to type " - + data_type->getName() + " in function " + getName(), ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cast tuple column to type {} in function {}", + data_type->getName(), getName()); auto columns_number = column_tuple->tupleSize(); Columns columns(columns_number); @@ -352,7 +353,7 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays( } } else - throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName()); } if (all_const) @@ -370,7 +371,7 @@ FunctionArrayIntersect::UnpackedArrays FunctionArrayIntersect::prepareArrays( if (arrays.base_rows == 0 && rows > 0) arrays.base_rows = rows; else if (arrays.base_rows != rows) - throw Exception("Non-const array columns in function " + getName() + "should have same rows", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-const array columns in function {}should have same rows", getName()); } } @@ -382,7 +383,7 @@ ColumnPtr FunctionArrayIntersect::executeImpl(const ColumnsWithTypeAndName & arg const auto * return_type_array = checkAndGetDataType(result_type.get()); if (!return_type_array) - throw Exception{"Return type for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Return type for function {} must be array.", getName()); const auto & nested_return_type = return_type_array->getNestedType(); @@ -483,7 +484,7 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable columns.push_back(checkAndGetColumn(arg.nested_column)); if (!columns.back()) - throw Exception("Unexpected array type for function arrayIntersect", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected array type for function arrayIntersect"); if (!arg.null_map) all_nullable = false; diff --git a/src/Functions/array/arrayJoin.cpp b/src/Functions/array/arrayJoin.cpp index 41f19fae6bf..cc854dc6807 100644 --- a/src/Functions/array/arrayJoin.cpp +++ b/src/Functions/array/arrayJoin.cpp @@ -55,14 +55,14 @@ public: { const auto & arr = getArrayJoinDataType(arguments[0]); if (!arr) - throw Exception("Argument for function " + getName() + " must be Array or Map", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be Array or Map", getName()); return arr->getNestedType(); } ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t /*input_rows_count*/) const override { - throw Exception("Function " + getName() + " must not be executed directly.", ErrorCodes::FUNCTION_IS_SPECIAL); + throw Exception(ErrorCodes::FUNCTION_IS_SPECIAL, "Function {} must not be executed directly.", getName()); } /// Because of function cannot be executed directly. diff --git a/src/Functions/array/arrayPop.h b/src/Functions/array/arrayPop.h index 1679f14bb43..bda6ac65923 100644 --- a/src/Functions/array/arrayPop.h +++ b/src/Functions/array/arrayPop.h @@ -34,8 +34,9 @@ public: const auto * array_type = typeid_cast(arguments[0].get()); if (!array_type) - throw Exception("First argument for function " + getName() + " must be an array but it has type " - + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be an array but it has type {}.", + getName(), arguments[0]->getName()); return arguments[0]; } @@ -56,7 +57,7 @@ public: if (const auto * argument_column_array = typeid_cast(array_column.get())) source = GatherUtils::createArraySource(*argument_column_array, false, size); else - throw Exception{"First arguments for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "First arguments for function {} must be array.", getName()); ColumnArray::MutablePtr sink; diff --git a/src/Functions/array/arrayPush.h b/src/Functions/array/arrayPush.h index 18815b7cabf..0ecc4c25eb1 100644 --- a/src/Functions/array/arrayPush.h +++ b/src/Functions/array/arrayPush.h @@ -38,8 +38,9 @@ public: const auto * array_type = typeid_cast(arguments[0].get()); if (!array_type) - throw Exception("First argument for function " + getName() + " must be an array but it has type " - + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be an array but it has type {}.", + getName(), arguments[0]->getName()); auto nested_type = array_type->getNestedType(); @@ -80,7 +81,7 @@ public: if (const auto * argument_column_array = typeid_cast(array_column.get())) array_source = GatherUtils::createArraySource(*argument_column_array, is_const, size); else - throw Exception{"First arguments for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "First arguments for function {} must be array.", getName()); bool is_appended_const = false; diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index e7ed8577049..d4896595941 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -67,22 +67,23 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName /// (possibly with parameters in parentheses, for example: "quantile(0.99)"). if (arguments.size() < 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 2.", + getName(), arguments.size()); const ColumnConst * aggregate_function_name_column = checkAndGetColumnConst(arguments[0].column.get()); if (!aggregate_function_name_column) - throw Exception("First argument for function " + getName() + " must be constant string: name of aggregate function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be constant string: " + "name of aggregate function.", getName()); DataTypes argument_types(arguments.size() - 1); for (size_t i = 1, size = arguments.size(); i < size; ++i) { const DataTypeArray * arg = checkAndGetDataType(arguments[i].type.get()); if (!arg) - throw Exception("Argument " + toString(i) + " for function " + getName() + " must be an array but it has type " - + arguments[i].type->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must be an array but it has type {}.", + i, getName(), arguments[i].type->getName()); argument_types[i - 1] = arg->getNestedType(); } @@ -92,8 +93,7 @@ DataTypePtr FunctionArrayReduce::getReturnTypeImpl(const ColumnsWithTypeAndName String aggregate_function_name_with_params = aggregate_function_name_column->getValue(); if (aggregate_function_name_with_params.empty()) - throw Exception("First argument for function " + getName() + " (name of aggregate function) cannot be empty.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument for function {} (name of aggregate function) cannot be empty.", getName()); String aggregate_function_name; Array params_row; @@ -139,13 +139,13 @@ ColumnPtr FunctionArrayReduce::executeImpl(const ColumnsWithTypeAndName & argume offsets_i = &materialized_arr.getOffsets(); } else - throw Exception("Illegal column " + col->getName() + " as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} as argument of function {}", col->getName(), getName()); if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + getName()); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 2cceea4ddba..07391c963a6 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -70,37 +70,39 @@ DataTypePtr FunctionArrayReduceInRanges::getReturnTypeImpl(const ColumnsWithType /// (possibly with parameters in parentheses, for example: "quantile(0.99)"). if (arguments.size() < 3) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 3.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 3.", + getName(), arguments.size()); const ColumnConst * aggregate_function_name_column = checkAndGetColumnConst(arguments[0].column.get()); if (!aggregate_function_name_column) - throw Exception("First argument for function " + getName() + " must be constant string: name of aggregate function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be constant string: " + "name of aggregate function.", getName()); const DataTypeArray * ranges_type_array = checkAndGetDataType(arguments[1].type.get()); if (!ranges_type_array) - throw Exception("Second argument for function " + getName() + " must be an array of ranges.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be an array of ranges.", + getName()); const DataTypeTuple * ranges_type_tuple = checkAndGetDataType(ranges_type_array->getNestedType().get()); if (!ranges_type_tuple || ranges_type_tuple->getElements().size() != 2) - throw Exception("Each array element in the second argument for function " + getName() + " must be a tuple (index, length).", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Each array element in the second argument for function {} must be a tuple (index, length).", + getName()); if (!isNativeInteger(ranges_type_tuple->getElements()[0])) - throw Exception("First tuple member in the second argument for function " + getName() + " must be ints or uints.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First tuple member in the second argument for function {} must be ints or uints.", getName()); if (!WhichDataType(ranges_type_tuple->getElements()[1]).isNativeUInt()) - throw Exception("Second tuple member in the second argument for function " + getName() + " must be uints.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second tuple member in the second argument for function {} must be uints.", getName()); DataTypes argument_types(arguments.size() - 2); for (size_t i = 2, size = arguments.size(); i < size; ++i) { const DataTypeArray * arg = checkAndGetDataType(arguments[i].type.get()); if (!arg) - throw Exception("Argument " + toString(i) + " for function " + getName() + " must be an array but it has type " - + arguments[i].type->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must be an array but it has type {}.", + i, getName(), arguments[i].type->getName()); argument_types[i - 2] = arg->getNestedType(); } @@ -110,8 +112,7 @@ DataTypePtr FunctionArrayReduceInRanges::getReturnTypeImpl(const ColumnsWithType String aggregate_function_name_with_params = aggregate_function_name_column->getValue(); if (aggregate_function_name_with_params.empty()) - throw Exception("First argument for function " + getName() + " (name of aggregate function) cannot be empty.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument for function {} (name of aggregate function) cannot be empty.", getName()); String aggregate_function_name; Array params_row; @@ -153,7 +154,7 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( ranges_offsets = &materialized_arr.getOffsets(); } else - throw Exception("Illegal column " + ranges_col_array->getName() + " as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} as argument of function {}", ranges_col_array->getName(), getName()); const IColumn & indices_col = static_cast(ranges_col_tuple)->getColumn(0); const IColumn & lengths_col = static_cast(ranges_col_tuple)->getColumn(1); @@ -184,13 +185,13 @@ ColumnPtr FunctionArrayReduceInRanges::executeImpl( offsets_i = &materialized_arr.getOffsets(); } else - throw Exception("Illegal column " + col->getName() + " as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} as argument of function {}", col->getName(), getName()); if (i == 0) offsets = offsets_i; else if (*offsets_i != *offsets) - throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + getName()); } const IColumn ** aggregate_arguments = aggregate_arguments_vec.data(); diff --git a/src/Functions/array/arrayResize.cpp b/src/Functions/array/arrayResize.cpp index 9aa06918659..8f4ea69fc5d 100644 --- a/src/Functions/array/arrayResize.cpp +++ b/src/Functions/array/arrayResize.cpp @@ -39,25 +39,26 @@ public: const size_t number_of_arguments = arguments.size(); if (number_of_arguments < 2 || number_of_arguments > 3) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(number_of_arguments) + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), number_of_arguments); if (arguments[0]->onlyNull()) return arguments[0]; const auto * array_type = typeid_cast(arguments[0].get()); if (!array_type) - throw Exception("First argument for function " + getName() + " must be an array but it has type " - + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be an array but it has type {}.", + getName(), arguments[0]->getName()); if (WhichDataType(array_type->getNestedType()).isNothing()) - throw Exception("Function " + getName() + " cannot resize " + array_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} cannot resize {}", getName(), array_type->getName()); if (!isInteger(removeNullable(arguments[1])) && !arguments[1]->onlyNull()) - throw Exception( - "Argument " + toString(1) + " for function " + getName() + " must be integer but it has type " - + arguments[1]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must be integer but it has type {}.", + toString(1), getName(), arguments[1]->getName()); if (number_of_arguments == 2) return arguments[0]; @@ -105,7 +106,7 @@ public: if (const auto * argument_column_array = typeid_cast(array_column.get())) array_source = GatherUtils::createArraySource(*argument_column_array, is_const, size); else - throw Exception{"First arguments for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "First arguments for function {} must be array.", getName()); bool is_appended_const = false; diff --git a/src/Functions/array/arrayReverse.cpp b/src/Functions/array/arrayReverse.cpp index 88703cbb032..912adbadc7c 100644 --- a/src/Functions/array/arrayReverse.cpp +++ b/src/Functions/array/arrayReverse.cpp @@ -37,8 +37,7 @@ public: { const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); if (!array_type) - throw Exception("Argument for function " + getName() + " must be array.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be array.", getName()); return arguments[0]; } @@ -59,8 +58,8 @@ ColumnPtr FunctionArrayReverse::executeImpl(const ColumnsWithTypeAndName & argum { const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); if (!array) - throw Exception("Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); auto res_ptr = array->cloneEmpty(); ColumnArray & res = assert_cast(*res_ptr); @@ -94,9 +93,8 @@ ColumnPtr FunctionArrayReverse::executeImpl(const ColumnsWithTypeAndName & argum if (src_nullable_col) if (!executeNumber(src_nullable_col->getNullMapColumn(), offsets, res_nullable_col->getNullMapColumn())) - throw Exception("Illegal column " + src_nullable_col->getNullMapColumn().getName() - + " of null map of the first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of null map of the first argument of function {}", + src_nullable_col->getNullMapColumn().getName(), getName()); return res_ptr; } diff --git a/src/Functions/array/arrayScalarProduct.h b/src/Functions/array/arrayScalarProduct.h index 4e3eab2faf8..94ce1bc533c 100644 --- a/src/Functions/array/arrayScalarProduct.h +++ b/src/Functions/array/arrayScalarProduct.h @@ -65,7 +65,7 @@ private: return nullptr; if (!col_array1->hasEqualOffsets(*col_array2)) - throw Exception("Array arguments for function " + getName() + " must have equal sizes", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName()); const ColumnVector * col_nested1 = checkAndGetColumn>(col_array1->getData()); const ColumnVector * col_nested2 = checkAndGetColumn>(col_array2->getData()); @@ -117,12 +117,12 @@ public: { const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); if (!array_type) - throw Exception("All arguments for function " + getName() + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be an array.", getName()); const auto & nested_type = array_type->getNestedType(); if (!isNativeNumber(nested_type) && !isEnum(nested_type)) - throw Exception( - getName() + " cannot process values of type " + nested_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} cannot process values of type {}", + getName(), nested_type->getName()); nested_types[i] = nested_type; } @@ -143,11 +143,8 @@ public: || (res = executeNumber(arguments)) || (res = executeNumber(arguments)) || (res = executeNumber(arguments)))) - throw Exception - { - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN - }; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); return res; } diff --git a/src/Functions/array/arrayShuffle.cpp b/src/Functions/array/arrayShuffle.cpp new file mode 100644 index 00000000000..9cf3ac8f3fe --- /dev/null +++ b/src/Functions/array/arrayShuffle.cpp @@ -0,0 +1,227 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_COLUMN; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** Shuffle array elements + * arrayShuffle(arr) + * arrayShuffle(arr, seed) + */ +struct FunctionArrayShuffleTraits +{ + static constexpr auto name = "arrayShuffle"; + static constexpr auto has_limit = false; // Permute the whole array + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1}; } + static constexpr auto max_num_params = 2; // array[, seed] + static constexpr auto seed_param_idx = 1; // --------^^^^ +}; + +/** Partial shuffle array elements + * arrayPartialShuffle(arr) + * arrayPartialShuffle(arr, limit) + * arrayPartialShuffle(arr, limit, seed) + */ +struct FunctionArrayPartialShuffleTraits +{ + static constexpr auto name = "arrayPartialShuffle"; + static constexpr auto has_limit = true; + static ColumnNumbers getArgumentsThatAreAlwaysConstant() { return {1, 2}; } + static constexpr auto max_num_params = 3; // array[, limit[, seed]] + static constexpr auto seed_param_idx = 2; // ----------------^^^^ +}; + +template +class FunctionArrayShuffleImpl : public IFunction +{ +public: + static constexpr auto name = Traits::name; + + String getName() const override { return name; } + bool isVariadic() const override { return true; } + size_t getNumberOfArguments() const override { return 0; } + ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return Traits::getArgumentsThatAreAlwaysConstant(); } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + static FunctionPtr create(ContextPtr) { return std::make_shared>(); } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() > Traits::max_num_params || arguments.empty()) + { + throw Exception( + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function '{}' needs from 1 to {} arguments, passed {}.", + getName(), + Traits::max_num_params, + arguments.size()); + } + + const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); + if (!array_type) + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function '{}' must be array", getName()); + + auto check_is_integral = [&](auto param_idx) + { + WhichDataType which(arguments[param_idx]); + if (!which.isUInt() && !which.isInt()) + throw Exception( + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type {} of arguments of function {} (must be UInt or Int)", + arguments[param_idx]->getName(), + getName()); + }; + + for (size_t idx = 1; idx < arguments.size(); ++idx) + check_is_integral(idx); + + return arguments[0]; + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t) const override; + +private: + static ColumnPtr executeGeneric(const ColumnArray & array, pcg64_fast & rng, size_t limit); +}; + +template +ColumnPtr FunctionArrayShuffleImpl::executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const +{ + const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); + if (!array) + throw Exception( + ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName()); + + const auto seed = [&]() -> uint64_t + { + // If present, seed comes as the last argument + if (arguments.size() != Traits::max_num_params) + return randomSeed(); + const auto * val = arguments[Traits::seed_param_idx].column.get(); + return val->getUInt(0); + }(); + pcg64_fast rng(seed); + + size_t limit = [&]() -> size_t + { + if constexpr (Traits::has_limit) + { + if (arguments.size() > 1) + { + const auto * val = arguments[1].column.get(); + return val->getUInt(0); + } + } + return 0; + }(); + + return executeGeneric(*array, rng, limit); +} + +template +ColumnPtr FunctionArrayShuffleImpl::executeGeneric(const ColumnArray & array, pcg64_fast & rng, size_t limit [[maybe_unused]]) +{ + const ColumnArray::Offsets & offsets = array.getOffsets(); + + size_t size = offsets.size(); + size_t nested_size = array.getData().size(); + IColumn::Permutation permutation(nested_size); + std::iota(std::begin(permutation), std::end(permutation), 0); + + ColumnArray::Offset current_offset = 0; + for (size_t i = 0; i < size; ++i) + { + auto next_offset = offsets[i]; + if constexpr (Traits::has_limit) + { + if (limit) + { + const auto effective_limit = std::min(limit, next_offset - current_offset); + partial_shuffle(&permutation[current_offset], &permutation[next_offset], effective_limit, rng); + } + else + shuffle(&permutation[current_offset], &permutation[next_offset], rng); + } + else + shuffle(&permutation[current_offset], &permutation[next_offset], rng); + current_offset = next_offset; + } + return ColumnArray::create(array.getData().permute(permutation, 0), array.getOffsetsPtr()); +} + +REGISTER_FUNCTION(ArrayShuffle) +{ + factory.registerFunction>( + { + R"( +Returns an array of the same size as the original array containing the elements in shuffled order. +Elements are being reordered in such a way that each possible permutation of those elements has equal probability of appearance. + +Note: this function will not materialize constants: +[example:materialize] + +If no seed is provided a random one will be used: +[example:random_seed] + +It is possible to override the seed to produce stable results: +[example:explicit_seed] +)", + Documentation::Examples{ + {"random_seed", "SELECT arrayShuffle([1, 2, 3, 4])"}, + {"explicit_seed", "SELECT arrayShuffle([1, 2, 3, 4], 41)"}, + {"materialize", "SELECT arrayShuffle(materialize([1, 2, 3]), 42), arrayShuffle([1, 2, 3], 42) FROM numbers(10)"}}, + Documentation::Categories{"Array"}}, + FunctionFactory::CaseInsensitive); + factory.registerFunction>( + { + R"( +Returns an array of the same size as the original array where elements in range [1..limit] are a random +subset of the original array. Remaining (limit..n] shall contain the elements not in [1..limit] range in undefined order. +Value of limit shall be in range [1..n]. Values outside of that range are equivalent to performing full arrayShuffle: +[example:no_limit1] +[example:no_limit2] + +Note: this function will not materialize constants: +[example:materialize] + +If no seed is provided a random one will be used: +[example:random_seed] + +It is possible to override the seed to produce stable results: +[example:explicit_seed] +)", + Documentation::Examples{ + {"no_limit1", "SELECT arrayPartialShuffle([1, 2, 3, 4], 0)"}, + {"no_limit2", "SELECT arrayPartialShuffle([1, 2, 3, 4])"}, + {"random_seed", "SELECT arrayPartialShuffle([1, 2, 3, 4], 2)"}, + {"explicit_seed", "SELECT arrayPartialShuffle([1, 2, 3, 4], 2, 41)"}, + {"materialize", + "SELECT arrayPartialShuffle(materialize([1, 2, 3, 4]), 2, 42), arrayPartialShuffle([1, 2, 3], 2, 42) FROM numbers(10)"}}, + Documentation::Categories{"Array"}}, + FunctionFactory::CaseInsensitive); +} + +} diff --git a/src/Functions/array/arraySlice.cpp b/src/Functions/array/arraySlice.cpp index 9d7efd8fccf..561986ba23a 100644 --- a/src/Functions/array/arraySlice.cpp +++ b/src/Functions/array/arraySlice.cpp @@ -48,24 +48,25 @@ public: const size_t number_of_arguments = arguments.size(); if (number_of_arguments < 2 || number_of_arguments > 3) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(number_of_arguments) + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), number_of_arguments); if (arguments[0]->onlyNull()) return arguments[0]; const auto * array_type = typeid_cast(arguments[0].get()); if (!array_type) - throw Exception("First argument for function " + getName() + " must be an array but it has type " - + arguments[0]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument for function {} must be an array but it has type {}.", + getName(), arguments[0]->getName()); for (size_t i = 1; i < number_of_arguments; ++i) { if (!isInteger(removeNullable(arguments[i])) && !arguments[i]->onlyNull()) - throw Exception( - "Argument " + toString(i) + " for function " + getName() + " must be integer but it has type " - + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must be integer but it has type {}.", + i, getName(), arguments[i]->getName()); } return arguments[0]; @@ -94,7 +95,7 @@ public: if (const auto * argument_column_array = typeid_cast(array_column.get())) source = GatherUtils::createArraySource(*argument_column_array, is_const, size); else - throw Exception{"First arguments for function " + getName() + " must be array.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "First arguments for function {} must be array.", getName()); ColumnArray::MutablePtr sink; diff --git a/src/Functions/array/arraySplit.cpp b/src/Functions/array/arraySplit.cpp index caa6438adb1..dd1cfc64c1b 100644 --- a/src/Functions/array/arraySplit.cpp +++ b/src/Functions/array/arraySplit.cpp @@ -69,7 +69,7 @@ struct ArraySplitImpl const auto * column_cut_const = checkAndGetColumnConst(&*mapped); if (!column_cut_const) - throw Exception("Unexpected type of cut column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of cut column"); if (column_cut_const->getValue()) { diff --git a/src/Functions/array/arrayUniq.cpp b/src/Functions/array/arrayUniq.cpp index a43c21508d9..1d1cf4e6392 100644 --- a/src/Functions/array/arrayUniq.cpp +++ b/src/Functions/array/arrayUniq.cpp @@ -44,16 +44,17 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", + getName(), arguments.size()); for (size_t i = 0; i < arguments.size(); ++i) { const DataTypeArray * array_type = checkAndGetDataType(arguments[i].get()); if (!array_type) - throw Exception("All arguments for function " + getName() + " must be arrays but argument " + - toString(i + 1) + " has type " + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "All arguments for function {} must be arrays but argument {} has type {}.", + getName(), i + 1, arguments[i]->getName()); } return std::make_shared(); @@ -140,9 +141,8 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument const ColumnConst * const_array = checkAndGetColumnConst( arguments[i].column.get()); if (!const_array) - throw Exception("Illegal column " + arguments[i].column->getName() - + " of " + toString(i + 1) + "-th argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of {}-th argument of function {}", + arguments[i].column->getName(), i + 1, getName()); array_holders.emplace_back(const_array->convertToFullColumn()); array = checkAndGetColumn(array_holders.back().get()); } @@ -151,8 +151,8 @@ ColumnPtr FunctionArrayUniq::executeImpl(const ColumnsWithTypeAndName & argument if (i == 0) offsets = &offsets_i; else if (offsets_i != *offsets) - throw Exception("Lengths of all arrays passed to " + getName() + " must be equal.", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Lengths of all arrays passed to {} must be equal.", + getName()); const auto * array_data = &array->getData(); data_columns[i] = array_data; diff --git a/src/Functions/array/arrayWithConstant.cpp b/src/Functions/array/arrayWithConstant.cpp index 2feac209b22..48262870553 100644 --- a/src/Functions/array/arrayWithConstant.cpp +++ b/src/Functions/array/arrayWithConstant.cpp @@ -40,9 +40,8 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isNativeNumber(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + - " of argument of function " + getName() + - ", expected Integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected Integer", + arguments[0]->getName(), getName()); return std::make_shared(arguments[1]); } @@ -64,12 +63,12 @@ public: auto array_size = col_num->getInt(i); if (unlikely(array_size < 0)) - throw Exception("Array size cannot be negative: while executing function " + getName(), ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Array size cannot be negative: while executing function {}", getName()); offset += array_size; if (unlikely(offset > max_arrays_size_in_columns)) - throw Exception("Too large array size while executing function " + getName(), ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size while executing function {}", getName()); offsets.push_back(offset); } diff --git a/src/Functions/array/arrayZip.cpp b/src/Functions/array/arrayZip.cpp index a56a8843e9b..3a50491fd4b 100644 --- a/src/Functions/array/arrayZip.cpp +++ b/src/Functions/array/arrayZip.cpp @@ -39,8 +39,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " needs at least one argument; passed " + toString(arguments.size()) + "." - , ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} needs at least one argument; passed {}." , getName(), arguments.size()); DataTypes arguments_types; for (size_t index = 0; index < arguments.size(); ++index) @@ -48,8 +48,8 @@ public: const DataTypeArray * array_type = checkAndGetDataType(arguments[index].type.get()); if (!array_type) - throw Exception("Argument " + toString(index + 1) + " of function " + getName() - + " must be array. Found " + arguments[0].type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument {} of function {} must be array. Found {} instead.", + toString(index + 1), getName(), arguments[0].type->getName()); arguments_types.emplace_back(array_type->getNestedType()); } @@ -72,8 +72,8 @@ public: const ColumnArray * column_array = checkAndGetColumn(holder.get()); if (!column_array) - throw Exception("Argument " + toString(i + 1) + " of function " + getName() + " must be array." - " Found column " + holder->getName() + " instead.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Argument {} of function {} must be array. Found column {} instead.", + i + 1, getName(), holder->getName()); if (i == 0) { @@ -81,8 +81,9 @@ public: } else if (!column_array->hasEqualOffsets(static_cast(*first_array_column))) { - throw Exception("The argument 1 and argument " + toString(i + 1) + " of function " + getName() + " have different array sizes", - ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "The argument 1 and argument {} of function {} have different array sizes", + i + 1, getName()); } tuple_columns[i] = column_array->getDataPtr(); diff --git a/src/Functions/array/emptyArrayToSingle.cpp b/src/Functions/array/emptyArrayToSingle.cpp index 3fc8f6caa54..86d4c32265a 100644 --- a/src/Functions/array/emptyArrayToSingle.cpp +++ b/src/Functions/array/emptyArrayToSingle.cpp @@ -40,8 +40,7 @@ public: { const DataTypeArray * array_type = checkAndGetDataType(arguments[0].get()); if (!array_type) - throw Exception("Argument for function " + getName() + " must be array.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must be array.", getName()); return arguments[0]; } @@ -146,7 +145,7 @@ namespace auto * concrete_res_data = typeid_cast(&res_data_col); if (!concrete_res_data) - throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error"); ColumnFixedString::Chars & res_data = concrete_res_data->getChars(); size_t size = src_offsets.size(); @@ -212,14 +211,14 @@ namespace auto * concrete_res_string_offsets = typeid_cast(&res_data_col); if (!concrete_res_string_offsets) - throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error"); ColumnString::Offsets & res_string_offsets = concrete_res_string_offsets->getOffsets(); const ColumnString::Chars & src_data_vec = src_data_concrete->getChars(); auto * concrete_res_data = typeid_cast(&res_data_col); if (!concrete_res_data) - throw Exception{"Internal error", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error"); ColumnString::Chars & res_data = concrete_res_data->getChars(); size_t size = src_array_offsets.size(); @@ -375,8 +374,8 @@ ColumnPtr FunctionEmptyArrayToSingle::executeImpl(const ColumnsWithTypeAndName & const ColumnArray * array = checkAndGetColumn(arguments[0].column.get()); if (!array) - throw Exception("Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); MutableColumnPtr res_ptr = array->cloneEmpty(); ColumnArray & res = assert_cast(*res_ptr); diff --git a/src/Functions/array/hasAllAny.h b/src/Functions/array/hasAllAny.h index 3ba8bb6156f..245984beff5 100644 --- a/src/Functions/array/hasAllAny.h +++ b/src/Functions/array/hasAllAny.h @@ -46,8 +46,9 @@ public: { const auto * array_type = typeid_cast(arguments[i].get()); if (!array_type) - throw Exception("Argument " + std::to_string(i) + " for function " + getName() + " must be an array but it has type " - + arguments[i]->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument {} for function {} must be an array but it has type {}.", + i, getName(), arguments[i]->getName()); } return std::make_shared(); @@ -78,7 +79,7 @@ public: if (const auto * argument_column_array = typeid_cast(argument_column.get())) sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, input_rows_count)); else - throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Arguments for function {} must be arrays.", getName()); } auto result_column = ColumnUInt8::create(input_rows_count); diff --git a/src/Functions/array/length.cpp b/src/Functions/array/length.cpp index 7a64c24fd6b..65555a501e8 100644 --- a/src/Functions/array/length.cpp +++ b/src/Functions/array/length.cpp @@ -41,7 +41,17 @@ struct LengthImpl [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) { - throw Exception("Cannot apply function length to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to UUID argument"); + } + + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv6 argument"); + } + + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function length to IPv4 argument"); } }; diff --git a/src/Functions/array/mapOp.cpp b/src/Functions/array/mapOp.cpp index dccb8001c1c..2e13d35488f 100644 --- a/src/Functions/array/mapOp.cpp +++ b/src/Functions/array/mapOp.cpp @@ -62,22 +62,20 @@ private: DataTypePtr & key_type, DataTypePtr & promoted_val_type, const DataTypePtr & check_key_type, DataTypePtr & check_val_type) const { if (!(check_key_type->equals(*key_type))) - throw Exception( - "Expected same " + key_type->getName() + " type for all keys in " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Expected same {} type for all keys in {}", + key_type->getName(), getName()); WhichDataType which_val(promoted_val_type); WhichDataType which_ch_val(check_val_type); if (which_ch_val.isFloat() != which_val.isFloat()) - throw Exception( - "All value types in " + getName() + " should be either or float or integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All value types in {} should be either or float or integer", + getName()); if (!(check_val_type->equals(*promoted_val_type))) { - throw Exception( - "All value types in " + getName() + " should be promotable to " + promoted_val_type->getName() + ", got " - + check_val_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All value types in {} should be promotable to {}, got {}", + getName(), promoted_val_type->getName(), check_val_type->getName()); } } @@ -92,24 +90,23 @@ private: const DataTypeTuple * tup = checkAndGetDataType(arg.get()); if (!tup) - throw Exception(getName() + " accepts at least two map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "{} accepts at least two map tuples", getName()); auto elems = tup->getElements(); if (elems.size() != 2) - throw Exception( - "Each tuple in " + getName() + " arguments should consist of two arrays", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Each tuple in {} arguments should consist of two arrays", + getName()); k = checkAndGetDataType(elems[0].get()); v = checkAndGetDataType(elems[1].get()); if (!k || !v) - throw Exception( - "Each tuple in " + getName() + " arguments should consist of two arrays", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Each tuple in {} arguments should consist of two arrays", + getName()); auto result_type = v->getNestedType(); if (!result_type->canBePromoted()) - throw Exception( - "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Values to be summed are expected to be Numeric, Float or Decimal."); auto promoted_val_type = result_type->promoteNumericType(); if (!key_type) @@ -134,13 +131,12 @@ private: { const auto * map = checkAndGetDataType(arg.get()); if (!map) - throw Exception(getName() + " accepts at least two maps", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "{} accepts at least two maps", getName()); const auto & v = map->getValueType(); if (!v->canBePromoted()) - throw Exception( - "Values to be summed are expected to be Numeric, Float or Decimal.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Values to be summed are expected to be Numeric, Float or Decimal."); auto promoted_val_type = v->promoteNumericType(); if (!key_type) @@ -159,14 +155,14 @@ private: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() < 2) - throw Exception(getName() + " accepts at least two maps or map tuples", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "{} accepts at least two maps or map tuples", getName()); if (arguments[0]->getTypeId() == TypeIndex::Tuple) return getReturnTypeForTuples(arguments); else if (arguments[0]->getTypeId() == TypeIndex::Map) return getReturnTypeForMaps(arguments); else - throw Exception(getName() + " only accepts maps", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} only accepts maps", getName()); } template @@ -217,8 +213,7 @@ private: len = arg.key_offsets[i] - offset; if (arg.val_offsets[i] != arg.key_offsets[i]) - throw Exception( - "Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Key and value array should have same amount of elements"); } Field temp_val; @@ -300,9 +295,8 @@ private: case TypeIndex::Float64: return execute2(row_count, args, res_type); default: - throw Exception( - "Illegal column type " + res_value_type->getName() + " for values in arguments of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column type {} for values in arguments of function {}", + res_value_type->getName(), getName()); } } @@ -386,9 +380,8 @@ private: } } else - throw Exception( - "Illegal column type " + arguments[0].type->getName() + " in arguments of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column type {} in arguments of function {}", + arguments[0].type->getName(), getName()); } // we can check const columns before any processing @@ -397,8 +390,7 @@ private: if (arg.is_const) { if (arg.val_offsets[0] != arg.key_offsets[0]) - throw Exception( - "Key and value array should have same amount of elements", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Key and value array should have same amount of elements"); } } @@ -439,9 +431,8 @@ private: case TypeIndex::String: return execute1(row_count, res_type, res_value_type, args); default: - throw Exception( - "Illegal column type " + key_type->getName() + " for keys in arguments of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column type {} for keys in arguments of function {}", + key_type->getName(), getName()); } } }; diff --git a/src/Functions/array/mapPopulateSeries.cpp b/src/Functions/array/mapPopulateSeries.cpp index 8814ce26189..45732d8957c 100644 --- a/src/Functions/array/mapPopulateSeries.cpp +++ b/src/Functions/array/mapPopulateSeries.cpp @@ -80,7 +80,7 @@ private: if (!(max_key_data_type.isInt() || max_key_data_type.isUInt())) throw Exception( ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Function {} max key should be of signed or unsigned integer type. Actual type {}.", + "Function {} max key should be of signed or unsigned integer type. Actual type {}, max type {}.", getName(), key_type->getName(), max_key_type->getName()); diff --git a/src/Functions/array/range.cpp b/src/Functions/array/range.cpp index 6bf65662d8c..dc09facb81b 100644 --- a/src/Functions/array/range.cpp +++ b/src/Functions/array/range.cpp @@ -58,8 +58,8 @@ private: for (const auto & arg : arguments) { if (!isInteger(arg)) - throw Exception{"Illegal type " + arg->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arg->getName(), getName()); } DataTypePtr common_type = getLeastSupertype(arguments); @@ -76,21 +76,23 @@ private: [this] (const size_t lhs, const T rhs) { if (rhs < 0) - throw Exception{"A call to function " + getName() + " overflows, only support positive values when only end is provided", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, only support positive values when only end is provided", getName()); const auto sum = lhs + rhs; if (sum < lhs) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values " + "of arguments you are passing", getName()); return sum; }); if (total_values > max_elements) - throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + - " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} would produce {} array elements, which " + "is greater than the allowed maximum of {}", + getName(), std::to_string(total_values), std::to_string(max_elements)); auto data_col = ColumnVector::create(total_values); auto offsets_col = ColumnArray::ColumnOffsets::create(in->size()); @@ -131,8 +133,7 @@ private: for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { if (step == 0) - throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); if (start < end_data[row_idx] && step > 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step) + 1; @@ -144,14 +145,16 @@ private: pre_values += row_length[row_idx]; if (pre_values < total_values) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values " + "of arguments you are passing", getName()); total_values = pre_values; if (total_values > max_elements) - throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + - " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} would produce {} array elements, which " + "is greater than the allowed maximum of {}", + getName(), std::to_string(total_values), std::to_string(max_elements)); } auto data_col = ColumnVector::create(total_values); @@ -193,8 +196,7 @@ private: for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { if (step == 0) - throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); if (start_data[row_idx] < end_data[row_idx] && step > 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start_data[row_idx]) - 1) / static_cast<__int128_t>(step) + 1; @@ -206,14 +208,16 @@ private: pre_values += row_length[row_idx]; if (pre_values < total_values) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values " + "of arguments you are passing", getName()); total_values = pre_values; if (total_values > max_elements) - throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + - " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} would produce {} array elements, which " + "is greater than the allowed maximum of {}", + getName(), std::to_string(total_values), std::to_string(max_elements)); } auto data_col = ColumnVector::create(total_values); @@ -255,8 +259,7 @@ private: for (size_t row_idx = 0; row_idx < input_rows_count; ++row_idx) { if (step_data[row_idx] == 0) - throw Exception{"A call to function " + getName() + " overflows, the 3rd argument step can't be zero", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "A call to function {} overflows, the 3rd argument step can't be zero", getName()); if (start < end_data[row_idx] && step_data[row_idx] > 0) row_length[row_idx] = (static_cast<__int128_t>(end_data[row_idx]) - static_cast<__int128_t>(start) - 1) / static_cast<__int128_t>(step_data[row_idx]) + 1; @@ -268,14 +271,16 @@ private: pre_values += row_length[row_idx]; if (pre_values < total_values) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values " + "of arguments you are passing", getName()); total_values = pre_values; if (total_values > max_elements) - throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + - " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} would produce {} array elements, which " + "is greater than the allowed maximum of {}", + getName(), std::to_string(total_values), std::to_string(max_elements)); } auto data_col = ColumnVector::create(total_values); @@ -332,14 +337,16 @@ private: pre_values += row_length[row_idx]; if (pre_values < total_values) - throw Exception{"A call to function " + getName() + " overflows, investigate the values of arguments you are passing", - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} overflows, investigate the values " + "of arguments you are passing", getName()); total_values = pre_values; if (total_values > max_elements) - throw Exception{"A call to function " + getName() + " would produce " + std::to_string(total_values) + - " array elements, which is greater than the allowed maximum of " + std::to_string(max_elements), - ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "A call to function {} would produce {} array elements, which " + "is greater than the allowed maximum of {}", + getName(), std::to_string(total_values), std::to_string(max_elements)); } auto data_col = ColumnVector::create(total_values); @@ -369,9 +376,9 @@ private: if (!which.isNativeUInt() && !which.isNativeInt()) { - throw Exception{"Illegal columns of arguments of function " + getName() - + ", the function only implemented for unsigned/signed integers up to 64 bit", - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal columns of arguments of function {}, the function only implemented " + "for unsigned/signed integers up to 64 bit", getName()); } ColumnPtr res; @@ -382,7 +389,7 @@ private: || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)) || (res = executeInternal(col)))) { - throw Exception{"Illegal column " + col->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", col->getName(), getName()); } return res; } @@ -508,7 +515,7 @@ private: if (!res) { - throw Exception{"Illegal columns " + column_ptrs[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {} of argument of function {}", column_ptrs[0]->getName(), getName()); } return res; diff --git a/src/Functions/ascii.cpp b/src/Functions/ascii.cpp index cb59be55cc1..5632f0d15df 100644 --- a/src/Functions/ascii.cpp +++ b/src/Functions/ascii.cpp @@ -60,6 +60,16 @@ struct AsciiImpl throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to UUID argument", AsciiName::name); } + [[noreturn]] static void ipv6(const ColumnIPv6::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv6 argument", AsciiName::name); + } + + [[noreturn]] static void ipv4(const ColumnIPv4::Container & /*offsets*/, size_t /*n*/, PaddedPODArray & /*res*/) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function {} to IPv4 argument", AsciiName::name); + } + private: static Int32 doAscii(const ColumnString::Chars & buf, size_t offset, size_t size) { diff --git a/src/Functions/bar.cpp b/src/Functions/bar.cpp index e1f65a61175..cfe9306fabd 100644 --- a/src/Functions/bar.cpp +++ b/src/Functions/bar.cpp @@ -53,15 +53,13 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() != 3 && arguments.size() != 4) - throw Exception("Function " + getName() - + " requires from 3 or 4 parameters: value, min_value, max_value, [max_width_of_bar = 80]. Passed " - + toString(arguments.size()) - + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires from 3 or 4 parameters: value, min_value, max_value, [max_width_of_bar = 80]. " + "Passed {}.", getName(), arguments.size()); if (!isNumber(arguments[0]) || !isNumber(arguments[1]) || !isNumber(arguments[2]) || (arguments.size() == 4 && !isNumber(arguments[3]))) - throw Exception("All arguments for function " + getName() + " must be numeric.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "All arguments for function {} must be numeric.", getName()); return std::make_shared(); } @@ -85,13 +83,13 @@ public: } if (isNaN(max_width)) - throw Exception("Argument 'max_width' must not be NaN", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument 'max_width' must not be NaN"); if (max_width < 1) - throw Exception("Argument 'max_width' must be >= 1", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Argument 'max_width' must be >= 1"); if (max_width > 1000) - throw Exception("Argument 'max_width' must be <= 1000", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Argument 'max_width' must be <= 1000"); const auto & src = *arguments[0].column; @@ -114,7 +112,7 @@ public: max_width); if (!isFinite(width)) - throw Exception("Value of width must not be NaN and Inf", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value of width must not be NaN and Inf"); size_t next_size = current_offset + UnicodeBar::getWidthInBytes(width) + 1; dst_chars.resize(next_size); diff --git a/src/Functions/bitAnd.cpp b/src/Functions/bitAnd.cpp index 2e3b79c6710..28f61ec66e1 100644 --- a/src/Functions/bitAnd.cpp +++ b/src/Functions/bitAnd.cpp @@ -31,7 +31,7 @@ struct BitAndImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) - throw Exception("BitAndImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitAndImpl expected an integral type"); return b.CreateAnd(left, right); } #endif diff --git a/src/Functions/bitBoolMaskAnd.cpp b/src/Functions/bitBoolMaskAnd.cpp index 2a0735e5ac8..11c0c1d1b7d 100644 --- a/src/Functions/bitBoolMaskAnd.cpp +++ b/src/Functions/bitBoolMaskAnd.cpp @@ -30,7 +30,7 @@ struct BitBoolMaskAndImpl // Should be a logical error, but this function is callable from SQL. // Need to investigate this. if constexpr (!std::is_same_v || !std::is_same_v) - throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd.", ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "It's a bug! Only UInt8 type is supported by __bitBoolMaskAnd."); auto left_bits = littleBits(left); auto right_bits = littleBits(right); diff --git a/src/Functions/bitBoolMaskOr.cpp b/src/Functions/bitBoolMaskOr.cpp index e0acde17a62..7940bf3e2ca 100644 --- a/src/Functions/bitBoolMaskOr.cpp +++ b/src/Functions/bitBoolMaskOr.cpp @@ -30,7 +30,7 @@ struct BitBoolMaskOrImpl if constexpr (!std::is_same_v || !std::is_same_v) // Should be a logical error, but this function is callable from SQL. // Need to investigate this. - throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitBoolMaskOr.", ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "It's a bug! Only UInt8 type is supported by __bitBoolMaskOr."); auto left_bits = littleBits(left); auto right_bits = littleBits(right); diff --git a/src/Functions/bitNot.cpp b/src/Functions/bitNot.cpp index b13becedc31..f8bfad64494 100644 --- a/src/Functions/bitNot.cpp +++ b/src/Functions/bitNot.cpp @@ -31,7 +31,7 @@ struct BitNotImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) - throw Exception("BitNotImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitNotImpl expected an integral type"); return b.CreateNot(arg); } #endif diff --git a/src/Functions/bitOr.cpp b/src/Functions/bitOr.cpp index 40d5f41884e..acdad33f38c 100644 --- a/src/Functions/bitOr.cpp +++ b/src/Functions/bitOr.cpp @@ -30,7 +30,7 @@ struct BitOrImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) - throw Exception("BitOrImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitOrImpl expected an integral type"); return b.CreateOr(left, right); } #endif diff --git a/src/Functions/bitRotateLeft.cpp b/src/Functions/bitRotateLeft.cpp index 8b99d45d9f0..c72466b8d49 100644 --- a/src/Functions/bitRotateLeft.cpp +++ b/src/Functions/bitRotateLeft.cpp @@ -23,7 +23,7 @@ struct BitRotateLeftImpl static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) - throw Exception("Bit rotate is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers"); else return (static_cast(a) << static_cast(b)) | (static_cast(a) >> ((sizeof(Result) * 8) - static_cast(b))); @@ -35,7 +35,7 @@ struct BitRotateLeftImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) - throw Exception("BitRotateLeftImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateLeftImpl expected an integral type"); auto * size = llvm::ConstantInt::get(left->getType(), left->getType()->getPrimitiveSizeInBits()); /// XXX how is this supposed to behave in signed mode? return b.CreateOr(b.CreateShl(left, right), b.CreateLShr(left, b.CreateSub(size, right))); diff --git a/src/Functions/bitRotateRight.cpp b/src/Functions/bitRotateRight.cpp index 0d84fbd9f64..045758f9a31 100644 --- a/src/Functions/bitRotateRight.cpp +++ b/src/Functions/bitRotateRight.cpp @@ -23,7 +23,7 @@ struct BitRotateRightImpl static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) - throw Exception("Bit rotate is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Bit rotate is not implemented for big integers"); else return (static_cast(a) >> static_cast(b)) | (static_cast(a) << ((sizeof(Result) * 8) - static_cast(b))); @@ -35,7 +35,7 @@ struct BitRotateRightImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) - throw Exception("BitRotateRightImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitRotateRightImpl expected an integral type"); auto * size = llvm::ConstantInt::get(left->getType(), left->getType()->getPrimitiveSizeInBits()); return b.CreateOr(b.CreateLShr(left, right), b.CreateShl(left, b.CreateSub(size, right))); } diff --git a/src/Functions/bitShiftLeft.cpp b/src/Functions/bitShiftLeft.cpp index b53c2b05da0..7b3748edb5c 100644 --- a/src/Functions/bitShiftLeft.cpp +++ b/src/Functions/bitShiftLeft.cpp @@ -23,7 +23,7 @@ struct BitShiftLeftImpl static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v) - throw Exception("BitShiftLeft is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else if constexpr (is_big_int_v) return static_cast(a) << static_cast(b); else @@ -34,7 +34,7 @@ struct BitShiftLeftImpl static ALWAYS_INLINE NO_SANITIZE_UNDEFINED void apply(const UInt8 * pos [[maybe_unused]], const UInt8 * end [[maybe_unused]], const B & b [[maybe_unused]], ColumnString::Chars & out_vec, ColumnString::Offsets & out_offsets) { if constexpr (is_big_int_v) - throw Exception("BitShiftLeft is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { UInt8 word_size = 8; @@ -99,7 +99,7 @@ struct BitShiftLeftImpl static ALWAYS_INLINE NO_SANITIZE_UNDEFINED void apply(const UInt8 * pos [[maybe_unused]], const UInt8 * end [[maybe_unused]], const B & b [[maybe_unused]], ColumnFixedString::Chars & out_vec) { if constexpr (is_big_int_v) - throw Exception("BitShiftLeft is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftLeft is not implemented for big integers as second argument"); else { UInt8 word_size = 8; @@ -148,7 +148,7 @@ struct BitShiftLeftImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) - throw Exception("BitShiftLeftImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftLeftImpl expected an integral type"); return b.CreateShl(left, right); } #endif diff --git a/src/Functions/bitShiftRight.cpp b/src/Functions/bitShiftRight.cpp index 8134a64ac53..108847f13ed 100644 --- a/src/Functions/bitShiftRight.cpp +++ b/src/Functions/bitShiftRight.cpp @@ -24,7 +24,7 @@ struct BitShiftRightImpl static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v) - throw Exception("BitShiftRight is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else if constexpr (is_big_int_v) return static_cast(a) >> static_cast(b); else @@ -50,7 +50,7 @@ struct BitShiftRightImpl static ALWAYS_INLINE NO_SANITIZE_UNDEFINED void apply(const UInt8 * pos [[maybe_unused]], const UInt8 * end [[maybe_unused]], const B & b [[maybe_unused]], ColumnString::Chars & out_vec, ColumnString::Offsets & out_offsets) { if constexpr (is_big_int_v) - throw Exception("BitShiftRight is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { UInt8 word_size = 8; @@ -87,7 +87,7 @@ struct BitShiftRightImpl static ALWAYS_INLINE NO_SANITIZE_UNDEFINED void apply(const UInt8 * pos [[maybe_unused]], const UInt8 * end [[maybe_unused]], const B & b [[maybe_unused]], ColumnFixedString::Chars & out_vec) { if constexpr (is_big_int_v) - throw Exception("BitShiftRight is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "BitShiftRight is not implemented for big integers as second argument"); else { UInt8 word_size = 8; @@ -126,7 +126,7 @@ struct BitShiftRightImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool is_signed) { if (!left->getType()->isIntegerTy()) - throw Exception("BitShiftRightImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitShiftRightImpl expected an integral type"); return is_signed ? b.CreateAShr(left, right) : b.CreateLShr(left, right); } #endif diff --git a/src/Functions/bitSlice.cpp b/src/Functions/bitSlice.cpp index c2392760194..9b0ee4d5f1e 100644 --- a/src/Functions/bitSlice.cpp +++ b/src/Functions/bitSlice.cpp @@ -45,26 +45,23 @@ public: const size_t number_of_arguments = arguments.size(); if (number_of_arguments < 2 || number_of_arguments > 3) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(number_of_arguments) - + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), number_of_arguments); if (!isString(arguments[0]) && !isStringOrFixedString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); if (arguments[0]->onlyNull()) return arguments[0]; if (!isNativeNumber(arguments[1])) - throw Exception( - "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}", + arguments[1]->getName(), getName()); if (number_of_arguments == 3 && !isNativeNumber(arguments[2])) - throw Exception( - "Illegal type " + arguments[2]->getName() + " of second argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}", + arguments[2]->getName(), getName()); return std::make_shared(); } @@ -105,9 +102,8 @@ public: return executeForSource( column_start, column_length, start_const, length_const, ConstSource(*col_const_fixed), input_rows_count); else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } template @@ -133,7 +129,7 @@ public: bitSliceFromRightConstantOffsetUnbounded( source, StringSink(*col_res, input_rows_count), -static_cast(start_value)); else - throw Exception("Indices in strings are 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); + throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based"); } else bitSliceDynamicOffsetUnbounded(source, StringSink(*col_res, input_rows_count), *column_start); @@ -151,7 +147,7 @@ public: bitSliceFromRightConstantOffsetBounded( source, StringSink(*col_res, input_rows_count), -static_cast(start_value), length_value); else - throw Exception("Indices in strings are 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); + throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based"); } else bitSliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_start, *column_length); diff --git a/src/Functions/bitSwapLastTwo.cpp b/src/Functions/bitSwapLastTwo.cpp index e7f07160693..4ca57f9b103 100644 --- a/src/Functions/bitSwapLastTwo.cpp +++ b/src/Functions/bitSwapLastTwo.cpp @@ -27,7 +27,7 @@ struct BitSwapLastTwoImpl if constexpr (!std::is_same_v) // Should be a logical error, but this function is callable from SQL. // Need to investigate this. - throw DB::Exception("It's a bug! Only UInt8 type is supported by __bitSwapLastTwo.", ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "It's a bug! Only UInt8 type is supported by __bitSwapLastTwo."); auto little_bits = littleBits(a); return static_cast(((little_bits & 1) << 1) | ((little_bits >> 1) & 1)); @@ -39,7 +39,7 @@ static constexpr bool compilable = true; static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) - throw Exception("__bitSwapLastTwo expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "__bitSwapLastTwo expected an integral type"); return b.CreateOr( b.CreateShl(b.CreateAnd(arg, 1), 1), b.CreateAnd(b.CreateLShr(arg, 1), 1) diff --git a/src/Functions/bitTest.cpp b/src/Functions/bitTest.cpp index ac21423ced6..4c9c6aa2dfb 100644 --- a/src/Functions/bitTest.cpp +++ b/src/Functions/bitTest.cpp @@ -24,7 +24,7 @@ struct BitTestImpl NO_SANITIZE_UNDEFINED static inline Result apply(A a [[maybe_unused]], B b [[maybe_unused]]) { if constexpr (is_big_int_v || is_big_int_v) - throw Exception("bitTest is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "bitTest is not implemented for big integers as second argument"); else return (typename NumberTraits::ToInteger::Type(a) >> typename NumberTraits::ToInteger::Type(b)) & 1; } diff --git a/src/Functions/bitWrapperFunc.cpp b/src/Functions/bitWrapperFunc.cpp index 83c89c753fc..f0851176513 100644 --- a/src/Functions/bitWrapperFunc.cpp +++ b/src/Functions/bitWrapperFunc.cpp @@ -27,7 +27,7 @@ struct BitWrapperFuncImpl // Should be a logical error, but this function is callable from SQL. // Need to investigate this. if constexpr (!is_integer) - throw DB::Exception("It's a bug! Only integer types are supported by __bitWrapperFunc.", ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, "It's a bug! Only integer types are supported by __bitWrapperFunc."); return a == 0 ? static_cast(0b10) : static_cast(0b1); } diff --git a/src/Functions/bitXor.cpp b/src/Functions/bitXor.cpp index 89aaf5eadd1..78c4c64d06e 100644 --- a/src/Functions/bitXor.cpp +++ b/src/Functions/bitXor.cpp @@ -30,7 +30,7 @@ struct BitXorImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (!left->getType()->isIntegerTy()) - throw Exception("BitXorImpl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BitXorImpl expected an integral type"); return b.CreateXor(left, right); } #endif diff --git a/src/Functions/caseWithExpression.cpp b/src/Functions/caseWithExpression.cpp index b198d60eb7d..c8b8dbd76cf 100644 --- a/src/Functions/caseWithExpression.cpp +++ b/src/Functions/caseWithExpression.cpp @@ -31,8 +31,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & args) const override { if (args.empty()) - throw Exception{"Function " + getName() + " expects at least 1 arguments", - ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 1 arguments", getName()); /// See the comments in executeImpl() to understand why we actually have to /// get the return type of a transform function. @@ -52,8 +51,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & args, const DataTypePtr & result_type, size_t input_rows_count) const override { if (args.empty()) - throw Exception{"Function " + getName() + " expects at least 1 argument", - ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION}; + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function {} expects at least 1 argument", getName()); /// In the following code, we turn the construction: /// CASE expr WHEN val[0] THEN branch[0] ... WHEN val[N-1] then branch[N-1] ELSE branchN diff --git a/src/Functions/checkHyperscanRegexp.cpp b/src/Functions/checkHyperscanRegexp.cpp index ba9705208d5..4a1bc4f9031 100644 --- a/src/Functions/checkHyperscanRegexp.cpp +++ b/src/Functions/checkHyperscanRegexp.cpp @@ -17,12 +17,13 @@ void checkHyperscanRegexp(const std::vector & regexps, size_t for (const auto & regexp : regexps) { if (max_hyperscan_regexp_length > 0 && regexp.size() > max_hyperscan_regexp_length) - throw Exception("Regexp length too large", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Regexp length too large ({} > {})", regexp.size(), max_hyperscan_regexp_length); total_regexp_length += regexp.size(); } if (max_hyperscan_regexp_total_length > 0 && total_regexp_length > max_hyperscan_regexp_total_length) - throw Exception("Total regexp lengths too large", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Total regexp lengths too large ({} > {})", + total_regexp_length, max_hyperscan_regexp_total_length); } } diff --git a/src/Functions/concat.cpp b/src/Functions/concat.cpp index 9f459711aa5..1bdd155aaa1 100644 --- a/src/Functions/concat.cpp +++ b/src/Functions/concat.cpp @@ -146,8 +146,8 @@ private: constant_strings[i] = const_col->getValue(); } else - throw Exception( - "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + column->getName(), getName()); } String pattern; @@ -213,10 +213,9 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() < 2) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be at least 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 2.", + getName(), arguments.size()); /// We always return Strings from concat, even if arguments were fixed strings. return std::make_shared(); diff --git a/src/Functions/convertCharset.cpp b/src/Functions/convertCharset.cpp index dbe23fc4bcb..b3b7394acb9 100644 --- a/src/Functions/convertCharset.cpp +++ b/src/Functions/convertCharset.cpp @@ -67,8 +67,8 @@ private: &status); if (!U_SUCCESS(status)) - throw Exception("Cannot create UConverter with charset " + charset + ", error: " + String(u_errorName(status)), - ErrorCodes::CANNOT_CREATE_CHARSET_CONVERTER); + throw Exception(ErrorCodes::CANNOT_CREATE_CHARSET_CONVERTER, "Cannot create UConverter with charset {}, error: {}", + charset, String(u_errorName(status))); } ~Converter() @@ -123,8 +123,8 @@ private: &status); if (!U_SUCCESS(status)) - throw Exception("Cannot convert from charset " + from_charset + ", error: " + String(u_errorName(status)), - ErrorCodes::CANNOT_CONVERT_CHARSET); + throw Exception(ErrorCodes::CANNOT_CONVERT_CHARSET, "Cannot convert from charset {}, error: {}", + from_charset, String(u_errorName(status))); auto max_to_char_size = ucnv_getMaxCharSize(converter_to->impl); auto max_to_size = UCNV_GET_MAX_BYTES_FOR_STRING(res, max_to_char_size); @@ -138,8 +138,8 @@ private: &status); if (!U_SUCCESS(status)) - throw Exception("Cannot convert to charset " + to_charset + ", error: " + String(u_errorName(status)), - ErrorCodes::CANNOT_CONVERT_CHARSET); + throw Exception(ErrorCodes::CANNOT_CONVERT_CHARSET, "Cannot convert to charset {}, error: {}", + to_charset, String(u_errorName(status))); current_to_offset += res; } @@ -175,8 +175,8 @@ public: { for (size_t i : collections::range(0, 3)) if (!isString(arguments[i])) - throw Exception("Illegal type " + arguments[i]->getName() + " of argument of function " + getName() - + ", must be String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, must be String", + arguments[i]->getName(), getName()); return std::make_shared(); } @@ -194,8 +194,9 @@ public: const ColumnConst * col_charset_to = checkAndGetColumnConstStringOrFixedString(arg_charset_to.column.get()); if (!col_charset_from || !col_charset_to) - throw Exception("2nd and 3rd arguments of function " + getName() + " (source charset and destination charset) must be constant strings.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "2nd and 3rd arguments of function {} (source charset and destination charset) must " + "be constant strings.", getName()); String charset_from = col_charset_from->getValue(); String charset_to = col_charset_to->getValue(); @@ -207,8 +208,7 @@ public: return col_to; } else - throw Exception("Illegal column passed as first argument of function " + getName() + " (must be ColumnString).", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column passed as first argument of function {} (must be ColumnString).", getName()); } }; diff --git a/src/Functions/countDigits.cpp b/src/Functions/countDigits.cpp index e5142a89689..aefe0d92d94 100644 --- a/src/Functions/countDigits.cpp +++ b/src/Functions/countDigits.cpp @@ -43,8 +43,8 @@ public: WhichDataType which_first(arguments[0]->getTypeId()); if (!which_first.isInt() && !which_first.isUInt() && !which_first.isDecimal()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); return std::make_shared(); /// Up to 255 decimal digits. } @@ -53,7 +53,7 @@ public: { const auto & src_column = arguments[0]; if (!src_column.column) - throw Exception("Illegal column while execute function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column while execute function {}", getName()); auto result_column = ColumnUInt8::create(); @@ -69,13 +69,12 @@ public: return true; } - throw Exception("Illegal column while execute function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column while execute function {}", getName()); }; TypeIndex dec_type_idx = src_column.type->getTypeId(); if (!callOnBasicType(dec_type_idx, call)) - throw Exception("Wrong call for " + getName() + " with " + src_column.type->getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Wrong call for {} with {}", getName(), src_column.type->getName()); return result_column; } diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 60668f81edf..457b77b9843 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -225,8 +225,8 @@ public: } else if constexpr (std::is_same_v>>) { - auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); - auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x); + auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y); if ((x_day_of_week > y_day_of_week) || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour)) || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) @@ -406,9 +406,9 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() != 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), arguments.size()); if (!isDate(arguments[0]) && !isDate32(arguments[0]) && !isDateTime(arguments[0]) && !isDateTime64(arguments[0])) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 36c0be49190..bfb190b9a08 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -276,7 +276,7 @@ private: { static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { - const auto day = ToDayOfWeekImpl::execute(source, timezone); + const auto day = ToDayOfWeekImpl::execute(source, 0, timezone); static constexpr std::string_view day_names[] = { "Monday", diff --git a/src/Functions/date_trunc.cpp b/src/Functions/date_trunc.cpp index 1c2475cf56a..016b8f4da5e 100644 --- a/src/Functions/date_trunc.cpp +++ b/src/Functions/date_trunc.cpp @@ -44,17 +44,16 @@ public: auto check_first_argument = [&] { const ColumnConst * datepart_column = checkAndGetColumnConst(arguments[0].column.get()); if (!datepart_column) - throw Exception("First argument for function " + getName() + " must be constant string: name of datepart", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be constant string: " + "name of datepart", getName()); datepart_param = datepart_column->getValue(); if (datepart_param.empty()) - throw Exception("First argument (name of datepart) for function " + getName() + " cannot be empty", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument (name of datepart) for function {} cannot be empty", + getName()); if (!IntervalKind::tryParseString(datepart_param, datepart_kind)) - throw Exception(datepart_param + " doesn't look like datepart name in " + getName(), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} doesn't look like datepart name in {}", datepart_param, getName()); result_type_is_date = (datepart_kind == IntervalKind::Year) || (datepart_kind == IntervalKind::Quarter) || (datepart_kind == IntervalKind::Month) @@ -64,30 +63,27 @@ public: bool second_argument_is_date = false; auto check_second_argument = [&] { if (!isDate(arguments[1].type) && !isDateTime(arguments[1].type) && !isDateTime64(arguments[1].type)) - throw Exception( - "Illegal type " + arguments[1].type->getName() + " of 2nd argument of function " + getName() - + ". Should be a date or a date with time", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 2nd argument of function {}. " + "Should be a date or a date with time", arguments[1].type->getName(), getName()); second_argument_is_date = isDate(arguments[1].type); if (second_argument_is_date && ((datepart_kind == IntervalKind::Hour) || (datepart_kind == IntervalKind::Minute) || (datepart_kind == IntervalKind::Second))) - throw Exception("Illegal type Date of argument for function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type Date of argument for function {}", getName()); }; auto check_timezone_argument = [&] { if (!WhichDataType(arguments[2].type).isString()) - throw Exception( - "Illegal type " + arguments[2].type->getName() + " of argument of function " + getName() - + ". This argument is optional and must be a constant string with timezone name", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + "This argument is optional and must be a constant string with timezone name", + arguments[2].type->getName(), getName()); if (second_argument_is_date && result_type_is_date) - throw Exception( - "The timezone argument of function " + getName() + " with datepart '" + datepart_param - + "' is allowed only when the 2nd argument has the type DateTime", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The timezone argument of function {} with datepart '{}' " + "is allowed only when the 2nd argument has the type DateTime", + getName(), datepart_param); }; if (arguments.size() == 2) @@ -103,10 +99,9 @@ public: } else { - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), arguments.size()); } if (result_type_is_date) diff --git a/src/Functions/decodeXMLComponent.cpp b/src/Functions/decodeXMLComponent.cpp index 80d1b5c3d79..8b84bb1194e 100644 --- a/src/Functions/decodeXMLComponent.cpp +++ b/src/Functions/decodeXMLComponent.cpp @@ -56,7 +56,7 @@ namespace [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Function decodeXMLComponent cannot work with FixedString argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function decodeXMLComponent cannot work with FixedString argument"); } private: diff --git a/src/Functions/defaultValueOfTypeName.cpp b/src/Functions/defaultValueOfTypeName.cpp index b20048bbde7..4941340ab1e 100644 --- a/src/Functions/defaultValueOfTypeName.cpp +++ b/src/Functions/defaultValueOfTypeName.cpp @@ -48,8 +48,8 @@ public: { const ColumnConst * col_type_const = typeid_cast(arguments.front().column.get()); if (!col_type_const || !isString(arguments.front().type)) - throw Exception("The argument of function " + getName() + " must be a constant string describing type.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must be a constant string describing type.", + getName()); return DataTypeFactory::instance().get(col_type_const->getValue()); } diff --git a/src/Functions/demange.cpp b/src/Functions/demange.cpp index a7c3d8e52bf..301bea3ab37 100644 --- a/src/Functions/demange.cpp +++ b/src/Functions/demange.cpp @@ -49,14 +49,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1) - throw Exception("Function " + getName() + " needs exactly one argument; passed " - + toString(arguments.size()) + ".", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs exactly one argument; passed {}.", + getName(), arguments.size()); const auto & type = arguments[0].type; if (!WhichDataType(type.get()).isString()) - throw Exception("The only argument for function " + getName() + " must be String. Found " - + type->getName() + " instead.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be String. " + "Found {} instead.", getName(), type->getName()); return std::make_shared(); } @@ -72,7 +72,7 @@ public: const ColumnString * column_concrete = checkAndGetColumn(column.get()); if (!column_concrete) - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", column->getName(), getName()); auto result_column = ColumnString::create(); diff --git a/src/Functions/divide.cpp b/src/Functions/divide.cpp index 1d042e19b9f..ca552256cd1 100644 --- a/src/Functions/divide.cpp +++ b/src/Functions/divide.cpp @@ -27,7 +27,7 @@ struct DivideFloatingImpl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * left, llvm::Value * right, bool) { if (left->getType()->isIntegerTy()) - throw Exception("DivideFloatingImpl expected a floating-point type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "DivideFloatingImpl expected a floating-point type"); return b.CreateFDiv(left, right); } #endif diff --git a/src/Functions/divideDecimal.cpp b/src/Functions/divideDecimal.cpp index 3c3fb03813b..b1e1296e790 100644 --- a/src/Functions/divideDecimal.cpp +++ b/src/Functions/divideDecimal.cpp @@ -49,7 +49,7 @@ struct DivideDecimalsImpl std::vector divided = DecimalOpHelpers::divide(a_digits, b.value * sign_b); if (divided.size() > DecimalUtils::max_precision) - throw DB::Exception("Numeric overflow: result bigger that Decimal256", ErrorCodes::DECIMAL_OVERFLOW); + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow: result bigger that Decimal256"); return Decimal256(sign_a * sign_b * DecimalOpHelpers::fromDigits(divided)); } }; diff --git a/src/Functions/encodeXMLComponent.cpp b/src/Functions/encodeXMLComponent.cpp index 5d31d7f463d..64d85ecaeb8 100644 --- a/src/Functions/encodeXMLComponent.cpp +++ b/src/Functions/encodeXMLComponent.cpp @@ -51,7 +51,7 @@ namespace [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Function encodeXML cannot work with FixedString argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function encodeXML cannot work with FixedString argument"); } private: diff --git a/src/Functions/evalMLMethod.cpp b/src/Functions/evalMLMethod.cpp index 003aaa2d312..346c8249905 100644 --- a/src/Functions/evalMLMethod.cpp +++ b/src/Functions/evalMLMethod.cpp @@ -58,12 +58,13 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " requires at least one argument", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} requires at least one argument", getName()); const auto * type = checkAndGetDataType(arguments[0].get()); if (!type) - throw Exception("Argument for function " + getName() + " must have type AggregateFunction - state of aggregate function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument for function {} must have type AggregateFunction - state " + "of aggregate function.", getName()); return type->getReturnTypeToPredict(); } @@ -71,7 +72,7 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " requires at least one argument", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function {} requires at least one argument", getName()); const auto * model = arguments[0].column.get(); @@ -81,8 +82,8 @@ public: const auto * agg_function = typeid_cast(model); if (!agg_function) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); return agg_function->predictValues(arguments, context); } diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index e077086a359..faee25aa0ab 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -92,24 +92,23 @@ public: const auto needle = typeid_cast(*column_needle).getValue(); if (needle.empty()) - throw Exception("Length of 'needle' argument must be greater than 0.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Length of 'needle' argument must be greater than 0."); using StringPiece = typename Regexps::Regexp::StringPieceType; const Regexps::Regexp holder = Regexps::createRegexp(needle); const auto & regexp = holder.getRE2(); if (!regexp) - throw Exception("There are no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no groups in regexp: {}", needle); const size_t groups_count = regexp->NumberOfCapturingGroups(); if (!groups_count) - throw Exception("There are no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no groups in regexp: {}", needle); if (groups_count > MAX_GROUPS_COUNT - 1) - throw Exception("Too many groups in regexp: " + std::to_string(groups_count) - + ", max: " + std::to_string(MAX_GROUPS_COUNT - 1), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Too many groups in regexp: {}, max: {}", + groups_count, std::to_string(MAX_GROUPS_COUNT - 1)); // Including 0-group, which is the whole regexp. PODArrayWithStackMemory matched_groups(groups_count + 1); diff --git a/src/Functions/extractGroups.cpp b/src/Functions/extractGroups.cpp index 8ec389827db..6744edda922 100644 --- a/src/Functions/extractGroups.cpp +++ b/src/Functions/extractGroups.cpp @@ -61,18 +61,18 @@ public: const auto needle = typeid_cast(*column_needle).getValue(); if (needle.empty()) - throw Exception(getName() + " length of 'needle' argument must be greater than 0.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} length of 'needle' argument must be greater than 0.", getName()); const Regexps::Regexp regexp = Regexps::createRegexp(needle); const auto & re2 = regexp.getRE2(); if (!re2) - throw Exception("There are no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no groups in regexp: {}", needle); const size_t groups_count = re2->NumberOfCapturingGroups(); if (!groups_count) - throw Exception("There are no groups in regexp: " + needle, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "There are no groups in regexp: {}", needle); // Including 0-group, which is the whole regexp. PODArrayWithStackMemory matched_groups(groups_count + 1); diff --git a/src/Functions/extractTextFromHTML.cpp b/src/Functions/extractTextFromHTML.cpp index 3c95431452f..a15611579bb 100644 --- a/src/Functions/extractTextFromHTML.cpp +++ b/src/Functions/extractTextFromHTML.cpp @@ -305,8 +305,8 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); return arguments[0]; } @@ -314,7 +314,7 @@ public: { const ColumnString * src = checkAndGetColumn(arguments[0].column.get()); if (!src) - throw Exception("First argument for function " + getName() + " must be string.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be string.", getName()); const ColumnString::Chars & src_chars = src->getChars(); const ColumnString::Offsets & src_offsets = src->getOffsets(); diff --git a/src/Functions/extractTimeZoneFromFunctionArguments.cpp b/src/Functions/extractTimeZoneFromFunctionArguments.cpp index 88e1d664bf0..5a20ca7e962 100644 --- a/src/Functions/extractTimeZoneFromFunctionArguments.cpp +++ b/src/Functions/extractTimeZoneFromFunctionArguments.cpp @@ -22,9 +22,9 @@ std::string extractTimeZoneNameFromColumn(const IColumn & column) const ColumnConst * time_zone_column = checkAndGetColumnConst(&column); if (!time_zone_column) - throw Exception("Illegal column " + column.getName() - + " of time zone argument of function, must be constant string", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of time zone argument of function, must be constant string", + column.getName()); return time_zone_column->getValue(); } @@ -59,7 +59,7 @@ const DateLUTImpl & extractTimeZoneFromFunctionArguments(const ColumnsWithTypeAn { std::string time_zone = extractTimeZoneNameFromColumn(*arguments[time_zone_arg_num].column); if (time_zone.empty()) - throw Exception("Provided time zone must be non-empty and be a valid time zone", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Provided time zone must be non-empty and be a valid time zone"); return DateLUT::instance(time_zone); } else diff --git a/src/Functions/filesystem.cpp b/src/Functions/filesystem.cpp index 7af1c61d3b8..1eb1c27211c 100644 --- a/src/Functions/filesystem.cpp +++ b/src/Functions/filesystem.cpp @@ -65,12 +65,12 @@ public: { if (arguments.size() > 1) { - throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0 or 1", getName()); } if (arguments.size() == 1 && !isStringOrFixedString(arguments[0])) { - throw Exception( - "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} should be String or FixedString", + getName()); } return std::make_shared(); } @@ -97,13 +97,12 @@ public: if (auto it = disk_map.find(disk_name); it != disk_map.end()) data[i] = Impl::get(it->second); else - throw Exception( - "Unknown disk name " + disk_name + " while execute function " + getName(), ErrorCodes::UNKNOWN_DISK); + throw Exception(ErrorCodes::UNKNOWN_DISK, "Unknown disk name {} while execute function {}", disk_name, getName()); } return col_res; } - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } } diff --git a/src/Functions/finalizeAggregation.cpp b/src/Functions/finalizeAggregation.cpp index 61604a3abc0..24561255339 100644 --- a/src/Functions/finalizeAggregation.cpp +++ b/src/Functions/finalizeAggregation.cpp @@ -60,10 +60,8 @@ public: { auto column = arguments.at(0).column; if (!typeid_cast(column.get())) - throw Exception("Illegal column " + arguments.at(0).column->getName() - + " of first argument of function " - + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments.at(0).column->getName(), getName()); /// Column is copied here, because there is no guarantee that we own it. auto mut_column = IColumn::mutate(std::move(column)); diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index e7c9a1b5103..630add20835 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -344,13 +344,13 @@ private: static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - *dest = '0' + ToDayOfWeekImpl::execute(source, timezone); + *dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone); return 1; } static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - auto day = ToDayOfWeekImpl::execute(source, timezone); + auto day = ToDayOfWeekImpl::execute(source, 0, timezone); *dest = '0' + (day == 7 ? 0 : day); return 1; } @@ -478,7 +478,7 @@ private: return res.size(); } - static size_t jodaCentryOfEra(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) + static size_t jodaCenturyOfEra(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { auto year = static_cast(ToYearImpl::execute(source, timezone)); year = (year < 0 ? -year : year); @@ -499,13 +499,13 @@ private: static size_t jodaDayOfWeek1Based(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - auto week_day = ToDayOfWeekImpl::execute(source, timezone); + auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone); return writeNumberWithPadding(dest, week_day, min_represent_digits); } static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - auto week_day = ToDayOfWeekImpl::execute(source, timezone); + auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone); if (week_day == 7) week_day = 0; @@ -641,8 +641,9 @@ public: arguments[0].type->getName(), getName()); if (arguments.size() > 1 && !(isInteger(arguments[0].type) || isDate(arguments[0].type) || isDateTime(arguments[0].type) || isDate32(arguments[0].type) || isDateTime64(arguments[0].type))) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "Illegal type {} of first argument of function {} when arguments size is 2 or 3. Should be a integer or a date with time", - arguments[0].type->getName(), getName()); + "Illegal type {} of first argument of function {} when arguments size is 2 or 3. " + "Should be a integer or a date with time", + arguments[0].type->getName(), getName()); } else { @@ -686,8 +687,9 @@ public: })) { throw Exception(ErrorCodes::ILLEGAL_COLUMN, - "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime or DateTime64 when arguments size is 1.", - arguments[0].column->getName(), getName()); + "Illegal column {} of function {}, must be Integer, Date, Date32, DateTime " + "or DateTime64 when arguments size is 1.", + arguments[0].column->getName(), getName()); } } else @@ -867,7 +869,7 @@ public: pos = percent_pos + 1; if (pos >= end) - throw Exception("Sign '%' is the last in format, if you need it, use '%%'", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sign '%' is the last in format, if you need it, use '%%'"); switch (*pos) { @@ -1103,7 +1105,7 @@ public: // Case 2: find closing single quote Int64 count = numLiteralChars(cur_token + 1, end); if (count == -1) - throw Exception("No closing single quote for literal", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "No closing single quote for literal"); else { for (Int64 i = 1; i <= count; i++) @@ -1135,7 +1137,7 @@ public: reserve_size += repetitions <= 3 ? 2 : 13; break; case 'C': - instructions.emplace_back(std::bind_front(&Action::jodaCentryOfEra, repetitions)); + instructions.emplace_back(std::bind_front(&Action::jodaCenturyOfEra, repetitions)); /// Year range [1900, 2299] reserve_size += std::max(repetitions, 2); break; @@ -1145,9 +1147,9 @@ public: reserve_size += repetitions == 2 ? 2 : std::max(repetitions, 4); break; case 'x': - throw Exception("format is not supported for WEEK_YEAR", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK_YEAR"); case 'w': - throw Exception("format is not supported for WEEK_OF_WEEK_YEAR", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for WEEK_OF_WEEK_YEAR"); case 'e': instructions.emplace_back(std::bind_front(&Action::jodaDayOfWeek1Based, repetitions)); /// Day of week range [1, 7] @@ -1232,7 +1234,7 @@ public: reserve_size += std::max(repetitions, 2); break; case 'S': - throw Exception("format is not supported for FRACTION_OF_SECOND", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for FRACTION_OF_SECOND"); case 'z': if (repetitions <= 3) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Short name time zone is not yet supported"); @@ -1242,7 +1244,7 @@ public: reserve_size += 32; break; case 'Z': - throw Exception("format is not supported for TIMEZONE_OFFSET_ID", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for TIMEZONE_OFFSET_ID"); default: if (isalpha(*cur_token)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "format is not supported for {}", String(cur_token, repetitions)); diff --git a/src/Functions/formatReadable.h b/src/Functions/formatReadable.h index 0378e1f82f2..487ec9d79d0 100644 --- a/src/Functions/formatReadable.h +++ b/src/Functions/formatReadable.h @@ -48,7 +48,7 @@ public: const IDataType & type = *arguments[0]; if (!isNativeNumber(type)) - throw Exception("Cannot format " + type.getName() + " because it's not a native numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot format {} because it's not a native numeric type", type.getName()); return std::make_shared(); } @@ -68,9 +68,8 @@ public: || (res = executeType(arguments)) || (res = executeType(arguments)) || (res = executeType(arguments)))) - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); return res; } diff --git a/src/Functions/formatReadableTimeDelta.cpp b/src/Functions/formatReadableTimeDelta.cpp index ec83c0e306f..5fd48c01e8f 100644 --- a/src/Functions/formatReadableTimeDelta.cpp +++ b/src/Functions/formatReadableTimeDelta.cpp @@ -51,28 +51,26 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be at least 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 1.", + getName(), arguments.size()); if (arguments.size() > 2) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be at most 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at most 2.", + getName(), arguments.size()); const IDataType & type = *arguments[0]; if (!isNativeNumber(type)) - throw Exception("Cannot format " + type.getName() + " as time delta", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot format {} as time delta", type.getName()); if (arguments.size() == 2) { const auto * maximum_unit_arg = arguments[1].get(); if (!isStringOrFixedString(maximum_unit_arg)) - throw Exception("Illegal type " + maximum_unit_arg->getName() + " of argument maximum_unit of function " - + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument maximum_unit of function {}", + maximum_unit_arg->getName(), getName()); } return std::make_shared(); diff --git a/src/Functions/formatRow.cpp b/src/Functions/formatRow.cpp index abfe2ce53c4..53fb5d61472 100644 --- a/src/Functions/formatRow.cpp +++ b/src/Functions/formatRow.cpp @@ -38,7 +38,7 @@ public: FunctionFormatRow(const String & format_name_, ContextPtr context_) : format_name(format_name_), context(context_) { if (!FormatFactory::instance().getAllFormats().contains(format_name)) - throw Exception("Unknown format " + format_name, ErrorCodes::UNKNOWN_FORMAT); + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", format_name); } String getName() const override { return name; } @@ -65,7 +65,9 @@ public: /// This function make sense only for row output formats. auto * row_output_format = dynamic_cast(out.get()); if (!row_output_format) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot turn rows into a {} format strings. {} function supports only row output formats", format_name, getName()); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot turn rows into a {} format strings. {} function supports only row output formats", + format_name, getName()); auto columns = arg_columns.getColumns(); for (size_t i = 0; i != input_rows_count; ++i) @@ -110,9 +112,8 @@ public: FunctionBasePtr buildImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & return_type) const override { if (arguments.size() < 2) - throw Exception( - "Function " + getName() + " requires at least two arguments: the format name and its output expression(s)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires at least two arguments: the format name and its output expression(s)", getName()); if (const auto * name_col = checkAndGetColumnConst(arguments.at(0).column.get())) return std::make_unique( @@ -120,7 +121,7 @@ public: collections::map(arguments, [](const auto & elem) { return elem.type; }), return_type); else - throw Exception("First argument to " + getName() + " must be a format name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument to {} must be a format name", getName()); } DataTypePtr getReturnTypeImpl(const DataTypes &) const override { return std::make_shared(); } diff --git a/src/Functions/formatString.cpp b/src/Functions/formatString.cpp index 3d04d6e94a1..ee6e26b775a 100644 --- a/src/Functions/formatString.cpp +++ b/src/Functions/formatString.cpp @@ -73,7 +73,7 @@ public: const ColumnConst * c0_const_string = typeid_cast(&*c0); if (!c0_const_string) - throw Exception("First argument of function " + getName() + " must be constant string", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument of function {} must be constant string", getName()); String pattern = c0_const_string->getValue(); @@ -106,8 +106,8 @@ public: constant_strings[i - 1] = const_col->getValue(); } else - throw Exception( - "Illegal column " + column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + column->getName(), getName()); } FormatImpl::formatExecute( diff --git a/src/Functions/fromModifiedJulianDay.cpp b/src/Functions/fromModifiedJulianDay.cpp index b22711da811..53e5fbffe52 100644 --- a/src/Functions/fromModifiedJulianDay.cpp +++ b/src/Functions/fromModifiedJulianDay.cpp @@ -192,16 +192,14 @@ namespace DB return std::make_unique>(argument_types, return_type); else // Should not happen. - throw Exception( - "The argument of function " + getName() + " must be integral", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must be integral", getName()); } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isInteger(arguments[0])) { - throw Exception( - "The argument of function " + getName() + " must be integral", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must be integral", getName()); } DataTypePtr base_type = std::make_shared(); diff --git a/src/Functions/fuzzBits.cpp b/src/Functions/fuzzBits.cpp index d97e8aff7ff..cd47b332219 100644 --- a/src/Functions/fuzzBits.cpp +++ b/src/Functions/fuzzBits.cpp @@ -69,11 +69,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isStringOrFixedString(arguments[0].type)) - throw Exception( - "First argument of function " + getName() + " must be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must be String or FixedString", + getName()); if (!arguments[1].column || !isFloat(arguments[1].type)) - throw Exception("Second argument of function " + getName() + " must be constant float", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument of function {} must be constant float", getName()); return arguments[0].type; } @@ -88,7 +88,7 @@ public: if (inverse_probability < 0.0 || 1.0 < inverse_probability) { - throw Exception("Second argument of function " + getName() + " must be from `0.0` to `1.0`", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Second argument of function {} must be from `0.0` to `1.0`", getName()); } if (const ColumnConst * col_in_untyped_const = checkAndGetColumnConstStringOrFixedString(col_in_untyped.get())) @@ -149,7 +149,7 @@ public: size_t total_size; if (common::mulOverflow(input_rows_count, n, total_size)) - throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW); + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow"); chars_to.resize(total_size); @@ -170,9 +170,8 @@ public: } else { - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } } }; diff --git a/src/Functions/generateUUIDv4.cpp b/src/Functions/generateUUIDv4.cpp index da4e324182a..1e89d9b5167 100644 --- a/src/Functions/generateUUIDv4.cpp +++ b/src/Functions/generateUUIDv4.cpp @@ -36,9 +36,9 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() > 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 0 or 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.", + getName(), arguments.size()); return std::make_shared(); } diff --git a/src/Functions/geoToH3.cpp b/src/Functions/geoToH3.cpp index 91c0e5b2361..52008f2e165 100644 --- a/src/Functions/geoToH3.cpp +++ b/src/Functions/geoToH3.cpp @@ -118,7 +118,7 @@ public: if (res > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(res), getName(), MAX_H3_RES); diff --git a/src/Functions/geohashDecode.cpp b/src/Functions/geohashDecode.cpp index b8a38c46dbf..b2454f5dffc 100644 --- a/src/Functions/geohashDecode.cpp +++ b/src/Functions/geohashDecode.cpp @@ -85,9 +85,8 @@ public: tryExecute(encoded, res_column)) return res_column; - throw Exception("Unsupported argument type:" + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument type:{} of argument of function {}", + arguments[0].column->getName(), getName()); } }; diff --git a/src/Functions/geohashEncode.cpp b/src/Functions/geohashEncode.cpp index 2f05a0b902a..bc0c8b8fc5f 100644 --- a/src/Functions/geohashEncode.cpp +++ b/src/Functions/geohashEncode.cpp @@ -51,9 +51,8 @@ public: } if (arguments.size() > 3) { - throw Exception("Too many arguments for function " + getName() + - " expected at most 3", - ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments for function {} expected at most 3", + getName()); } return std::make_shared(); @@ -93,7 +92,7 @@ public: out_vec.resize(pos - begin); if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); result = std::move(col_str); @@ -125,9 +124,8 @@ public: arguments_description += arguments[i].column->getName(); } - throw Exception("Unsupported argument types: " + arguments_description + - + " for function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported argument types: {} for function {}", + arguments_description, getName()); } }; diff --git a/src/Functions/geohashesInBox.cpp b/src/Functions/geohashesInBox.cpp index 9ba4a62a2a0..ac8d4a6ad8f 100644 --- a/src/Functions/geohashesInBox.cpp +++ b/src/Functions/geohashesInBox.cpp @@ -47,13 +47,10 @@ public: arguments[0]->equals(*arguments[2]) && arguments[0]->equals(*arguments[3]))) { - throw Exception("Illegal type of argument of " + getName() + - " all coordinate arguments must have the same type, instead they are:" + - arguments[0]->getName() + ", " + - arguments[1]->getName() + ", " + - arguments[2]->getName() + ", " + - arguments[3]->getName() + ".", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Illegal type of argument of {} all coordinate arguments must have the same type, " + "instead they are:{}, {}, {}, {}.", getName(), arguments[0]->getName(), + arguments[1]->getName(), arguments[2]->getName(), arguments[3]->getName()); } return std::make_shared(std::make_shared()); @@ -100,12 +97,9 @@ public: if (!lon_min || !lat_min || !lon_max || !lat_max || !precision) { - throw Exception("Unsupported argument types for function " + getName() + " : " + - lon_min_column->getName() + ", " + - lat_min_column->getName() + ", " + - lon_max_column->getName() + ", " + - lat_max_column->getName() + ".", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unsupported argument types for function {} : {}, {}, {}, {}.", + getName(), lon_min_column->getName(), + lat_min_column->getName(), lon_max_column->getName(), lat_max_column->getName()); } auto col_res = ColumnArray::create(ColumnString::create()); @@ -128,9 +122,9 @@ public: if (prepared_args.items_count > max_array_size) { - throw Exception(getName() + " would produce " + std::to_string(prepared_args.items_count) + - " array elements, which is bigger than the allowed maximum of " + std::to_string(max_array_size), - ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "{} would produce {} array elements, " + "which is bigger than the allowed maximum of {}", + getName(), prepared_args.items_count, max_array_size); } res_strings_offsets.reserve(res_strings_offsets.size() + prepared_args.items_count); @@ -148,14 +142,13 @@ public: if (!res_strings_offsets.empty() && res_strings_offsets.back() != res_strings_chars.size()) { - throw Exception("String column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "String column size mismatch (internal logical error)"); } if (!res_offsets.empty() && res_offsets.back() != res_strings.size()) { - throw Exception("Array column size mismatch (internal logical error)" + - std::to_string(res_offsets.back()) + " != " + std::to_string(res_strings.size()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Array column size mismatch (internal logical error){} != {}", + res_offsets.back(), std::to_string(res_strings.size())); } result = std::move(col_res); diff --git a/src/Functions/geometryConverters.h b/src/Functions/geometryConverters.h index 8bd44edac67..3dbf3763fdc 100644 --- a/src/Functions/geometryConverters.h +++ b/src/Functions/geometryConverters.h @@ -74,10 +74,10 @@ struct ColumnToPointsConverter const Float64 second = second_container[i]; if (isNaN(first) || isNaN(second)) - throw Exception("Point's component must not be NaN", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Point's component must not be NaN"); if (std::isinf(first) || std::isinf(second)) - throw Exception("Point's component must not be infinite", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Point's component must not be infinite"); answer[i] = Point(first, second); } @@ -339,7 +339,7 @@ static void callOnGeometryDataType(DataTypePtr type, F && f) return f(ConverterType>()); else if (factory.get("MultiPolygon")->equals(*type)) return f(ConverterType>()); - throw Exception(fmt::format("Unknown geometry type {}", type->getName()), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown geometry type {}", type->getName()); } diff --git a/src/Functions/getMacro.cpp b/src/Functions/getMacro.cpp index 96c3acc7088..8172fc8ba2e 100644 --- a/src/Functions/getMacro.cpp +++ b/src/Functions/getMacro.cpp @@ -61,7 +61,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception("The argument of function " + getName() + " must have String type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must have String type", getName()); return std::make_shared(); } @@ -71,7 +71,7 @@ public: const ColumnString * arg_string = checkAndGetColumnConstData(arg_column); if (!arg_string) - throw Exception("The argument of function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must be constant String", getName()); return result_type->createColumnConst(input_rows_count, macros->getValue(arg_string->getDataAt(0).toString())); } diff --git a/src/Functions/getScalar.cpp b/src/Functions/getScalar.cpp index d7036255e10..1a26ac5f4da 100644 --- a/src/Functions/getScalar.cpp +++ b/src/Functions/getScalar.cpp @@ -52,7 +52,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1 || !isString(arguments[0].type) || !arguments[0].column || !isColumnConst(*arguments[0].column)) - throw Exception("Function " + getName() + " accepts one const string argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} accepts one const string argument", getName()); auto scalar_name = assert_cast(*arguments[0].column).getValue(); ContextPtr query_context = getContext()->hasQueryContext() ? getContext()->getQueryContext() : getContext(); scalar = query_context->getScalar(scalar_name).getByPosition(0); diff --git a/src/Functions/getSetting.cpp b/src/Functions/getSetting.cpp index e7ea44aab28..d6b8946e760 100644 --- a/src/Functions/getSetting.cpp +++ b/src/Functions/getSetting.cpp @@ -49,12 +49,14 @@ private: Field getValue(const ColumnsWithTypeAndName & arguments) const { if (!isString(arguments[0].type)) - throw Exception{"The argument of function " + String{name} + " should be a constant string with the name of a setting", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "The argument of function {} should be a constant string with the name of a setting", + String{name}); const auto * column = arguments[0].column.get(); if (!column || !checkAndGetColumnConstStringOrFixedString(column)) - throw Exception{"The argument of function " + String{name} + " should be a constant string with the name of a setting", - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "The argument of function {} should be a constant string with the name of a setting", + String{name}); std::string_view setting_name{column->getDataAt(0).toView()}; return getContext()->getSettingsRef().get(setting_name); diff --git a/src/Functions/getSizeOfEnumType.cpp b/src/Functions/getSizeOfEnumType.cpp index 6128f5a44cb..7eabcb5150a 100644 --- a/src/Functions/getSizeOfEnumType.cpp +++ b/src/Functions/getSizeOfEnumType.cpp @@ -48,7 +48,7 @@ public: else if (which.isEnum16()) return std::make_shared(); - throw Exception("The argument for function " + getName() + " must be Enum", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument for function {} must be Enum", getName()); } ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override diff --git a/src/Functions/globalVariable.cpp b/src/Functions/globalVariable.cpp index 50e94fad614..9ec7e82bf6b 100644 --- a/src/Functions/globalVariable.cpp +++ b/src/Functions/globalVariable.cpp @@ -50,7 +50,7 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!checkColumnConst(arguments[0].column.get())) - throw Exception("Argument of function " + getName() + " must be constant string", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument of function {} must be constant string", getName()); String variable_name = assert_cast(*arguments[0].column).getValue(); auto variable = global_variable_map.find(Poco::toLower(variable_name)); diff --git a/src/Functions/greatCircleDistance.cpp b/src/Functions/greatCircleDistance.cpp index dc13d7f2deb..db1f777fa91 100644 --- a/src/Functions/greatCircleDistance.cpp +++ b/src/Functions/greatCircleDistance.cpp @@ -259,9 +259,8 @@ private: { const auto * arg = arguments[arg_idx].get(); if (!isNumber(WhichDataType(arg))) - throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be numeric", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. " + "Must be numeric", arg->getName(), std::to_string(arg_idx + 1), getName()); } return std::make_shared(); diff --git a/src/Functions/h3EdgeAngle.cpp b/src/Functions/h3EdgeAngle.cpp index bd6d5d2b47f..3cf8f653024 100644 --- a/src/Functions/h3EdgeAngle.cpp +++ b/src/Functions/h3EdgeAngle.cpp @@ -77,7 +77,7 @@ public: if (resolution > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(resolution), getName(), MAX_H3_RES); diff --git a/src/Functions/h3EdgeLengthKm.cpp b/src/Functions/h3EdgeLengthKm.cpp index 821b699e8bb..dbcdcbf6791 100644 --- a/src/Functions/h3EdgeLengthKm.cpp +++ b/src/Functions/h3EdgeLengthKm.cpp @@ -78,7 +78,7 @@ public: if (resolution > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(resolution), getName(), MAX_H3_RES); diff --git a/src/Functions/h3EdgeLengthM.cpp b/src/Functions/h3EdgeLengthM.cpp index 5544f8555ff..3e5659077d1 100644 --- a/src/Functions/h3EdgeLengthM.cpp +++ b/src/Functions/h3EdgeLengthM.cpp @@ -83,7 +83,7 @@ public: if (resolution > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(resolution), getName(), MAX_H3_RES); Float64 res = getHexagonEdgeLengthAvgM(resolution); diff --git a/src/Functions/h3GetPentagonIndexes.cpp b/src/Functions/h3GetPentagonIndexes.cpp index 098a577f05c..15b5f2ab68b 100644 --- a/src/Functions/h3GetPentagonIndexes.cpp +++ b/src/Functions/h3GetPentagonIndexes.cpp @@ -83,7 +83,7 @@ public: if (data[row] > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(data[row]), getName(), MAX_H3_RES); diff --git a/src/Functions/h3HexAreaKm2.cpp b/src/Functions/h3HexAreaKm2.cpp index b6c9434077f..e127096ac33 100644 --- a/src/Functions/h3HexAreaKm2.cpp +++ b/src/Functions/h3HexAreaKm2.cpp @@ -78,7 +78,7 @@ public: if (resolution > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(resolution), getName(), MAX_H3_RES); diff --git a/src/Functions/h3HexAreaM2.cpp b/src/Functions/h3HexAreaM2.cpp index 07b276fe155..c38211b49f8 100644 --- a/src/Functions/h3HexAreaM2.cpp +++ b/src/Functions/h3HexAreaM2.cpp @@ -78,7 +78,7 @@ public: if (resolution > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(resolution), getName(), MAX_H3_RES); diff --git a/src/Functions/h3NumHexagons.cpp b/src/Functions/h3NumHexagons.cpp index 5414d42b49c..677ea3cd170 100644 --- a/src/Functions/h3NumHexagons.cpp +++ b/src/Functions/h3NumHexagons.cpp @@ -77,7 +77,7 @@ public: if (resolution > MAX_H3_RES) throw Exception( ErrorCodes::ARGUMENT_OUT_OF_BOUND, - "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is ", + "The argument 'resolution' ({}) of function {} is out of bounds because the maximum resolution in H3 library is {}", toString(resolution), getName(), MAX_H3_RES); Int64 res = getNumCells(resolution); dst_data[row] = res; diff --git a/src/Functions/h3ToChildren.cpp b/src/Functions/h3ToChildren.cpp index 8a17d014f3a..3c174ef4042 100644 --- a/src/Functions/h3ToChildren.cpp +++ b/src/Functions/h3ToChildren.cpp @@ -115,7 +115,7 @@ public: throw Exception( ErrorCodes::TOO_LARGE_ARRAY_SIZE, "The result of function {} (array of {} elements) will be too large with resolution argument = {}", - getName(), toString(vec_size), toString(child_resolution)); + getName(), vec_size, toString(child_resolution)); std::vector hindex_vec; hindex_vec.resize(vec_size); diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index 4c3008dd516..824056a452b 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -65,8 +65,7 @@ public: DataTypePtr FunctionHasColumnInTable::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const { if (arguments.size() < 3 || arguments.size() > 6) - throw Exception{"Invalid number of arguments for function " + getName(), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Invalid number of arguments for function {}", getName()); static const std::string arg_pos_description[] = {"First", "Second", "Third", "Fourth", "Fifth", "Sixth"}; for (size_t i = 0; i < arguments.size(); ++i) @@ -75,8 +74,8 @@ DataTypePtr FunctionHasColumnInTable::getReturnTypeImpl(const ColumnsWithTypeAnd if (!checkColumnConst(argument.column.get())) { - throw Exception(arg_pos_description[i] + " argument for function " + getName() + " must be const String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} argument for function {} must be const String.", + arg_pos_description[i], getName()); } } @@ -111,7 +110,7 @@ ColumnPtr FunctionHasColumnInTable::executeImpl(const ColumnsWithTypeAndName & a String column_name = get_string_from_columns(arguments[arg++]); if (table_name.empty()) - throw Exception("Table name is empty", ErrorCodes::UNKNOWN_TABLE); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table name is empty"); bool has_column; if (host_name.empty()) diff --git a/src/Functions/hasToken.cpp b/src/Functions/hasToken.cpp index 646ff0b54f7..ba1ff9b3ab5 100644 --- a/src/Functions/hasToken.cpp +++ b/src/Functions/hasToken.cpp @@ -1,26 +1,32 @@ +#include "FunctionFactory.h" #include "FunctionsStringSearch.h" -#include #include "HasTokenImpl.h" + #include - -namespace DB -{ namespace { - struct NameHasToken { static constexpr auto name = "hasToken"; }; -using FunctionHasToken = FunctionsStringSearch>; +struct NameHasTokenOrNull +{ + static constexpr auto name = "hasTokenOrNull"; +}; +using FunctionHasToken = DB::FunctionsStringSearch>; +using FunctionHasTokenOrNull = DB:: + FunctionsStringSearch, DB::ExecutionErrorPolicy::Null>; } REGISTER_FUNCTION(HasToken) { - factory.registerFunction(); -} + factory.registerFunction( + {"Performs lookup of needle in haystack using tokenbf_v1 index."}, DB::FunctionFactory::CaseSensitive); + factory.registerFunction( + {"Performs lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, + DB::FunctionFactory::CaseSensitive); } diff --git a/src/Functions/hasTokenCaseInsensitive.cpp b/src/Functions/hasTokenCaseInsensitive.cpp index 0012ea3e148..4fc19ca0784 100644 --- a/src/Functions/hasTokenCaseInsensitive.cpp +++ b/src/Functions/hasTokenCaseInsensitive.cpp @@ -1,27 +1,34 @@ +#include "FunctionFactory.h" #include "FunctionsStringSearch.h" -#include #include "HasTokenImpl.h" + #include - -namespace DB -{ namespace { - struct NameHasTokenCaseInsensitive { static constexpr auto name = "hasTokenCaseInsensitive"; }; -using FunctionHasTokenCaseInsensitive - = FunctionsStringSearch>; +struct NameHasTokenCaseInsensitiveOrNull +{ + static constexpr auto name = "hasTokenCaseInsensitiveOrNull"; +}; +using FunctionHasTokenCaseInsensitive + = DB::FunctionsStringSearch>; +using FunctionHasTokenCaseInsensitiveOrNull = DB::FunctionsStringSearch< + DB::HasTokenImpl, + DB::ExecutionErrorPolicy::Null>; } REGISTER_FUNCTION(HasTokenCaseInsensitive) { - factory.registerFunction(); -} + factory.registerFunction( + {"Performs case insensitive lookup of needle in haystack using tokenbf_v1 index."}, DB::FunctionFactory::CaseInsensitive); + factory.registerFunction( + {"Performs case insensitive lookup of needle in haystack using tokenbf_v1 index. Returns null if needle is ill-formed."}, + DB::FunctionFactory::CaseInsensitive); } diff --git a/src/Functions/if.cpp b/src/Functions/if.cpp index 049e6d24920..93bdf406f9d 100644 --- a/src/Functions/if.cpp +++ b/src/Functions/if.cpp @@ -226,7 +226,7 @@ private: UInt32 left_scale = getDecimalScale(*arguments[1].type); UInt32 right_scale = getDecimalScale(*arguments[2].type); if (left_scale != right_scale) - throw Exception("Conditional functions with different Decimal scales", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Conditional functions with different Decimal scales"); return left_scale; } else @@ -754,8 +754,7 @@ private: new_cond_column = ColumnConst::create(new_cond_column, column_size); } else - throw Exception("Illegal column " + arg_cond.column->getName() + " of " + getName() + " condition", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of {} condition", arg_cond.column->getName(), getName()); ColumnsWithTypeAndName temporary_columns { @@ -924,9 +923,8 @@ private: return makeNullableColumnIfNot(arg_else_column); } else - throw Exception("Illegal column " + arg_cond.column->getName() + " of first argument of function " + getName() - + ". Must be ColumnUInt8 or ColumnConstUInt8.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. " + "Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName()); } /// If else is NULL, we create Nullable column with null mask OR-ed with negated condition. @@ -975,9 +973,8 @@ private: return result_type->createColumn()->cloneResized(input_rows_count); } else - throw Exception("Illegal column " + arg_cond.column->getName() + " of first argument of function " + getName() - + ". Must be ColumnUInt8 or ColumnConstUInt8.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. " + "Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName()); } return nullptr; @@ -1039,8 +1036,8 @@ public: removeNullable(arguments[0]), arguments[1], arguments[2]}); if (!WhichDataType(arguments[0]).isUInt8()) - throw Exception("Illegal type " + arguments[0]->getName() + " of first argument (condition) of function if. Must be UInt8.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument (condition) of function if. " + "Must be UInt8.", arguments[0]->getName()); return getLeastSupertype(DataTypes{arguments[1], arguments[2]}); } @@ -1086,9 +1083,8 @@ public: } if (!cond_col) - throw Exception("Illegal column " + arg_cond.column->getName() + " of first argument of function " + getName() - + ". Must be ColumnUInt8 or ColumnConstUInt8.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}. " + "Must be ColumnUInt8 or ColumnConstUInt8.", arg_cond.column->getName(), getName()); auto call = [&](const auto & types) -> bool { diff --git a/src/Functions/in.cpp b/src/Functions/in.cpp index 1de8371cf90..53dc25b55af 100644 --- a/src/Functions/in.cpp +++ b/src/Functions/in.cpp @@ -104,8 +104,8 @@ public: if (!column_set) column_set = checkAndGetColumn(column_set_ptr.get()); if (!column_set) - throw Exception("Second argument for function '" + getName() + "' must be Set; found " + column_set_ptr->getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function '{}' must be Set; found {}", + getName(), column_set_ptr->getName()); ColumnsWithTypeAndName columns_of_key_columns; diff --git a/src/Functions/initializeAggregation.cpp b/src/Functions/initializeAggregation.cpp index b782cd04f75..cec9f0d94a3 100644 --- a/src/Functions/initializeAggregation.cpp +++ b/src/Functions/initializeAggregation.cpp @@ -55,14 +55,14 @@ private: DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const { if (arguments.size() < 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be at least 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be at least 2.", + getName(), arguments.size()); const ColumnConst * aggregate_function_name_column = checkAndGetColumnConst(arguments[0].column.get()); if (!aggregate_function_name_column) - throw Exception("First argument for function " + getName() + " must be constant string: name of aggregate function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be constant string: " + "name of aggregate function.", getName()); DataTypes argument_types(arguments.size() - 1); for (size_t i = 1, size = arguments.size(); i < size; ++i) @@ -75,8 +75,7 @@ DataTypePtr FunctionInitializeAggregation::getReturnTypeImpl(const ColumnsWithTy String aggregate_function_name_with_params = aggregate_function_name_column->getValue(); if (aggregate_function_name_with_params.empty()) - throw Exception("First argument for function " + getName() + " (name of aggregate function) cannot be empty.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First argument for function {} (name of aggregate function) cannot be empty.", getName()); String aggregate_function_name; Array params_row; diff --git a/src/Functions/intExp10.cpp b/src/Functions/intExp10.cpp index 6e59a41c50b..909afc4df17 100644 --- a/src/Functions/intExp10.cpp +++ b/src/Functions/intExp10.cpp @@ -23,7 +23,7 @@ struct IntExp10Impl static inline ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v || std::is_same_v) - throw DB::Exception("IntExp10 is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "IntExp10 is not implemented for big integers"); else return intExp10(static_cast(a)); } diff --git a/src/Functions/intExp2.cpp b/src/Functions/intExp2.cpp index e39647b6c38..7d04f329e3f 100644 --- a/src/Functions/intExp2.cpp +++ b/src/Functions/intExp2.cpp @@ -24,7 +24,7 @@ struct IntExp2Impl static inline ResultType apply([[maybe_unused]] A a) { if constexpr (is_big_int_v) - throw DB::Exception("intExp2 not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "intExp2 not implemented for big integers"); else return intExp2(static_cast(a)); } @@ -35,7 +35,7 @@ struct IntExp2Impl static inline llvm::Value * compile(llvm::IRBuilder<> & b, llvm::Value * arg, bool) { if (!arg->getType()->isIntegerTy()) - throw Exception("IntExp2Impl expected an integral type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IntExp2Impl expected an integral type"); return b.CreateShl(llvm::ConstantInt::get(arg->getType(), 1), arg); } #endif diff --git a/src/Functions/isDecimalOverflow.cpp b/src/Functions/isDecimalOverflow.cpp index fcba8c1f64b..504ece7794f 100644 --- a/src/Functions/isDecimalOverflow.cpp +++ b/src/Functions/isDecimalOverflow.cpp @@ -41,21 +41,22 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty() || arguments.size() > 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " + - toString(arguments.size()) + ", should be 1 or 2.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1 or 2.", + getName(), arguments.size()); WhichDataType which_first(arguments[0]->getTypeId()); if (!which_first.isDecimal()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); if (arguments.size() == 2) { WhichDataType which_second(arguments[1]->getTypeId()); if (!which_second.isUInt8()) - throw Exception("Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[1]->getName(), getName()); } return std::make_shared(); @@ -65,19 +66,19 @@ public: { const auto & src_column = arguments[0]; if (!src_column.column) - throw Exception("Illegal column while execute function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column while execute function {}", getName()); UInt32 precision = 0; if (arguments.size() == 2) { const auto & precision_column = arguments[1]; if (!precision_column.column) - throw Exception("Illegal column while execute function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column while execute function {}", getName()); const ColumnConst * const_column = checkAndGetColumnConst(precision_column.column.get()); if (!const_column) - throw Exception("Second argument for function " + getName() + " must be constant UInt8: precision.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be constant UInt8: " + "precision.", getName()); precision = const_column->getValue(); } @@ -105,13 +106,12 @@ public: return true; } - throw Exception("Illegal column while execute function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column while execute function {}", getName()); }; TypeIndex dec_type_idx = src_column.type->getTypeId(); if (!callOnBasicType(dec_type_idx, call)) - throw Exception("Wrong call for " + getName() + " with " + src_column.type->getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Wrong call for {} with {}", getName(), src_column.type->getName()); return result_column; } diff --git a/src/Functions/isIPAddressContainedIn.cpp b/src/Functions/isIPAddressContainedIn.cpp index 23cac4dbef0..abbcb0a5e37 100644 --- a/src/Functions/isIPAddressContainedIn.cpp +++ b/src/Functions/isIPAddressContainedIn.cpp @@ -75,9 +75,9 @@ IPAddressCIDR parseIPWithCIDR(std::string_view cidr_str) size_t pos_slash = cidr_str.find('/'); if (pos_slash == 0) - throw DB::Exception("Error parsing IP address with prefix: " + std::string(cidr_str), DB::ErrorCodes::CANNOT_PARSE_TEXT); + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "Error parsing IP address with prefix: {}", std::string(cidr_str)); if (pos_slash == std::string_view::npos) - throw DB::Exception("The text does not contain '/': " + std::string(cidr_str), DB::ErrorCodes::CANNOT_PARSE_TEXT); + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "The text does not contain '/': {}", std::string(cidr_str)); std::string_view addr_str = cidr_str.substr(0, pos_slash); IPAddressVariant addr(addr_str); @@ -90,7 +90,7 @@ IPAddressCIDR parseIPWithCIDR(std::string_view cidr_str) uint8_t max_prefix = (addr.asV6() ? IPV6_BINARY_LENGTH : IPV4_BINARY_LENGTH) * 8; bool has_error = parse_error != std::errc() || parse_end != prefix_str_end || prefix > max_prefix; if (has_error) - throw DB::Exception("The CIDR has a malformed prefix bits: " + std::string(cidr_str), DB::ErrorCodes::CANNOT_PARSE_TEXT); + throw DB::Exception(DB::ErrorCodes::CANNOT_PARSE_TEXT, "The CIDR has a malformed prefix bits: {}", std::string(cidr_str)); return {addr, static_cast(prefix)}; } @@ -146,16 +146,15 @@ namespace DB DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.size() != 2) - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) + ", should be 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), arguments.size()); const DataTypePtr & addr_type = arguments[0]; const DataTypePtr & prefix_type = arguments[1]; if (!isString(addr_type) || !isString(prefix_type)) - throw Exception("The arguments of function " + getName() + " must be String", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The arguments of function {} must be String", getName()); return std::make_shared(); } diff --git a/src/Functions/isValidUTF8.cpp b/src/Functions/isValidUTF8.cpp index 3ecf69cc364..0871e82adb8 100644 --- a/src/Functions/isValidUTF8.cpp +++ b/src/Functions/isValidUTF8.cpp @@ -241,12 +241,22 @@ SOFTWARE. [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) { - throw Exception("Cannot apply function isValidUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to Array argument"); } [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) { - throw Exception("Cannot apply function isValidUTF8 to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to UUID argument"); + } + + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv6 argument"); + } + + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function isValidUTF8 to IPv4 argument"); } }; diff --git a/src/Functions/lemmatize.cpp b/src/Functions/lemmatize.cpp index 4a44c3a2509..e1168e077bb 100644 --- a/src/Functions/lemmatize.cpp +++ b/src/Functions/lemmatize.cpp @@ -61,7 +61,9 @@ public: static FunctionPtr create(ContextPtr context) { if (!context->getSettingsRef().allow_experimental_nlp_functions) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Natural language processing function '{}' is experimental. " + "Set `allow_experimental_nlp_functions` setting to enable it", name); return std::make_shared(context->getLemmatizers()); } @@ -82,11 +84,11 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); if (!isString(arguments[1])) - throw Exception( - "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[1]->getName(), getName()); return arguments[1]; } @@ -103,11 +105,11 @@ public: const ColumnString * words_col = checkAndGetColumn(strcolumn.get()); if (!lang_col) - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); if (!words_col) - throw Exception( - "Illegal column " + arguments[1].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[1].column->getName(), getName()); String language = lang_col->getValue(); auto lemmatizer = lemmatizers.getLemmatizer(language); diff --git a/src/Functions/lengthUTF8.cpp b/src/Functions/lengthUTF8.cpp index 9e5b5d04dd2..5a4af4934df 100644 --- a/src/Functions/lengthUTF8.cpp +++ b/src/Functions/lengthUTF8.cpp @@ -51,12 +51,22 @@ struct LengthUTF8Impl [[noreturn]] static void array(const ColumnString::Offsets &, PaddedPODArray &) { - throw Exception("Cannot apply function lengthUTF8 to Array argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to Array argument"); } [[noreturn]] static void uuid(const ColumnUUID::Container &, size_t &, PaddedPODArray &) { - throw Exception("Cannot apply function lengthUTF8 to UUID argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to UUID argument"); + } + + [[noreturn]] static void ipv6(const ColumnIPv6::Container &, size_t &, PaddedPODArray &) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv6 argument"); + } + + [[noreturn]] static void ipv4(const ColumnIPv4::Container &, size_t &, PaddedPODArray &) + { + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot apply function lengthUTF8 to IPv4 argument"); } }; diff --git a/src/Functions/logTrace.cpp b/src/Functions/logTrace.cpp index e1e4c5eb2b0..55f387cbfeb 100644 --- a/src/Functions/logTrace.cpp +++ b/src/Functions/logTrace.cpp @@ -32,9 +32,8 @@ namespace DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); return std::make_shared(); } @@ -44,8 +43,8 @@ namespace if (const ColumnConst * col = checkAndGetColumnConst(arguments[0].column.get())) message = col->getDataAt(0).data; else - throw Exception( - "First argument for function " + getName() + " must be Constant string", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be Constant string", + getName()); static auto * log = &Poco::Logger::get("FunctionLogTrace"); LOG_TRACE(log, fmt::runtime(message)); diff --git a/src/Functions/lowCardinalityIndices.cpp b/src/Functions/lowCardinalityIndices.cpp index 3c9d618b9ba..a9f15aaf7bb 100644 --- a/src/Functions/lowCardinalityIndices.cpp +++ b/src/Functions/lowCardinalityIndices.cpp @@ -36,8 +36,9 @@ public: { const auto * type = typeid_cast(arguments[0].get()); if (!type) - throw Exception("First first argument of function lowCardinalityIndexes must be ColumnLowCardinality, but got " - + arguments[0]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First first argument of function lowCardinalityIndexes must be ColumnLowCardinality, " + "but got {}", arguments[0]->getName()); return std::make_shared(); } diff --git a/src/Functions/lowCardinalityKeys.cpp b/src/Functions/lowCardinalityKeys.cpp index 963034f0830..e57e6b8fa13 100644 --- a/src/Functions/lowCardinalityKeys.cpp +++ b/src/Functions/lowCardinalityKeys.cpp @@ -34,8 +34,9 @@ public: { const auto * type = typeid_cast(arguments[0].get()); if (!type) - throw Exception("First first argument of function lowCardinalityKeys must be ColumnLowCardinality, but got " - + arguments[0]->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First first argument of function lowCardinalityKeys must be ColumnLowCardinality, " + "but got {}", arguments[0]->getName()); return type->getDictionaryType(); } diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index cdb68a142a0..3160c5ddb43 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -201,15 +201,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 1", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1", + getName(), arguments.size()); const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); if (!map_type) - throw Exception{"First argument for function " + getName() + " must be a map", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a map", getName()); auto key_type = map_type->getKeyType(); @@ -250,15 +249,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 1", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 1", + getName(), arguments.size()); const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); if (!map_type) - throw Exception{"First argument for function " + getName() + " must be a map", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a map", getName()); auto value_type = map_type->getValueType(); @@ -295,7 +293,7 @@ public: : checkAndGetColumn(arguments[0].column.get()); const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); if (!col_map || !map_type) - throw Exception{"First argument for function " + getName() + " must be a map", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a map", getName()); auto col_res = ColumnVector::create(); typename ColumnVector::Container & vec_res = col_res->getData(); @@ -359,23 +357,20 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), arguments.size()); const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); const DataTypeString * pattern_type = checkAndGetDataType(arguments[1].type.get()); if (!map_type) - throw Exception{"First argument for function " + getName() + " must be a Map", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a Map", getName()); if (!pattern_type) - throw Exception{"Second argument for function " + getName() + " must be String", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument for function {} must be String", getName()); if (!isStringOrFixedString(map_type->getKeyType())) - throw Exception{"Key type of map for function " + getName() + " must be `String` or `FixedString`", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Key type of map for function {} must be `String` or `FixedString`", getName()); return std::make_shared(); } @@ -403,16 +398,15 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), arguments.size()); const DataTypeMap * map_type = checkAndGetDataType(arguments[0].type.get()); if (!map_type) - throw Exception{"First argument for function " + getName() + " must be a map", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be a map", getName()); auto key_type = map_type->getKeyType(); @@ -420,12 +414,11 @@ public: WhichDataType which(key_type); if (!which.isStringOrFixedString()) - throw Exception{"Function " + getName() + "only support the map with String or FixedString key", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {}only support the map with String or FixedString key", + getName()); if (!isStringOrFixedString(arguments[1].type)) - throw Exception{"Second argument passed to function " + getName() + " must be String or FixedString", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument passed to function {} must be String or FixedString", getName()); return std::make_shared(map_type->getKeyType(), map_type->getValueType()); } @@ -539,19 +532,19 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), arguments.size()); const DataTypeMap * left = checkAndGetDataType(arguments[0].type.get()); const DataTypeMap * right = checkAndGetDataType(arguments[1].type.get()); if (!left || !right) - throw Exception{"The two arguments for function " + getName() + " must be both Map type", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The two arguments for function {} must be both Map type", + getName()); if (!left->getKeyType()->equals(*right->getKeyType()) || !left->getValueType()->equals(*right->getValueType())) - throw Exception{"The Key And Value type of Map for function " + getName() + " must be the same", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The Key And Value type of Map for function {} must be the same", + getName()); return std::make_shared(left->getKeyType(), left->getValueType()); } diff --git a/src/Functions/mapFilter.cpp b/src/Functions/mapFilter.cpp index 639cee7ce0f..038e3bb3ce0 100644 --- a/src/Functions/mapFilter.cpp +++ b/src/Functions/mapFilter.cpp @@ -51,7 +51,7 @@ struct MapFilterImpl const auto * column_filter_const = checkAndGetColumnConst(&*mapped); if (!column_filter_const) - throw Exception("Unexpected type of filter column", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type of filter column"); if (column_filter_const->getValue()) return map_column.clone(); diff --git a/src/Functions/modulo.cpp b/src/Functions/modulo.cpp index 379869bc6ec..be791bdeda0 100644 --- a/src/Functions/modulo.cpp +++ b/src/Functions/modulo.cpp @@ -82,7 +82,7 @@ struct ModuloByConstantImpl /// Division by min negative value. if (std::is_signed_v && b == std::numeric_limits::lowest()) - throw Exception("Division by the most negative number", ErrorCodes::ILLEGAL_DIVISION); + throw Exception(ErrorCodes::ILLEGAL_DIVISION, "Division by the most negative number"); /// Modulo of division by negative number is the same as the positive number. if (b < 0) diff --git a/src/Functions/monthName.cpp b/src/Functions/monthName.cpp index f782ac647cc..f49f77bd6e7 100644 --- a/src/Functions/monthName.cpp +++ b/src/Functions/monthName.cpp @@ -40,7 +40,7 @@ public: ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 1", getName(), - toString(arguments.size())); + arguments.size()); WhichDataType argument_type(arguments[0].type); if (!argument_type.isDate() && !argument_type.isDateTime() && !argument_type.isDateTime64()) diff --git a/src/Functions/multiIf.cpp b/src/Functions/multiIf.cpp index 37301037c0e..f26d35c4f6e 100644 --- a/src/Functions/multiIf.cpp +++ b/src/Functions/multiIf.cpp @@ -81,8 +81,7 @@ public: }; if (!(args.size() >= 3 && args.size() % 2 == 1)) - throw Exception{"Invalid number of arguments for function " + getName(), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Invalid number of arguments for function {}", getName()); for_conditions([&](const DataTypePtr & arg) { @@ -101,9 +100,8 @@ public: } if (!WhichDataType(nested_type).isUInt8()) - throw Exception{"Illegal type " + arg->getName() + " of argument (condition) " - "of function " + getName() + ". Must be UInt8.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument (condition) of function {}. " + "Must be UInt8.", arg->getName(), getName()); }); DataTypes types_of_branches; diff --git a/src/Functions/multiplyDecimal.cpp b/src/Functions/multiplyDecimal.cpp index 101c6bc528d..1ec2d4acc9b 100644 --- a/src/Functions/multiplyDecimal.cpp +++ b/src/Functions/multiplyDecimal.cpp @@ -48,7 +48,7 @@ struct MultiplyDecimalsImpl return Decimal256(0); if (multiplied.size() > DecimalUtils::max_precision) - throw DB::Exception("Numeric overflow: result bigger that Decimal256", ErrorCodes::DECIMAL_OVERFLOW); + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow: result bigger that Decimal256"); return Decimal256(sign_a * sign_b * DecimalOpHelpers::fromDigits(multiplied)); } diff --git a/src/Functions/neighbor.cpp b/src/Functions/neighbor.cpp index 07a7a028604..d367695448a 100644 --- a/src/Functions/neighbor.cpp +++ b/src/Functions/neighbor.cpp @@ -63,7 +63,7 @@ public: if (number_of_arguments < 2 || number_of_arguments > 3) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be from 2 to 3", - getName(), toString(number_of_arguments)); + getName(), number_of_arguments); // second argument must be an integer if (!isInteger(arguments[1])) diff --git a/src/Functions/normalizeQuery.cpp b/src/Functions/normalizeQuery.cpp index ee1688fe197..b01dac3909a 100644 --- a/src/Functions/normalizeQuery.cpp +++ b/src/Functions/normalizeQuery.cpp @@ -45,7 +45,7 @@ struct Impl [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Cannot apply function normalizeQuery to fixed string.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function normalizeQuery to fixed string."); } }; diff --git a/src/Functions/normalizeString.cpp b/src/Functions/normalizeString.cpp index 3f704ee0613..92411490eaa 100644 --- a/src/Functions/normalizeString.cpp +++ b/src/Functions/normalizeString.cpp @@ -159,7 +159,7 @@ struct NormalizeUTF8Impl [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Cannot apply function normalizeUTF8 to fixed string.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function normalizeUTF8 to fixed string."); } }; diff --git a/src/Functions/normalizedQueryHash.cpp b/src/Functions/normalizedQueryHash.cpp index b6efa4e18cd..d2ccb1c016d 100644 --- a/src/Functions/normalizedQueryHash.cpp +++ b/src/Functions/normalizedQueryHash.cpp @@ -69,7 +69,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", arguments[0]->getName(), getName()); return std::make_shared(); } @@ -90,8 +90,8 @@ public: return col_res; } else - throw Exception("Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } }; diff --git a/src/Functions/now.cpp b/src/Functions/now.cpp index 694e6bc97cb..901ce369031 100644 --- a/src/Functions/now.cpp +++ b/src/Functions/now.cpp @@ -96,12 +96,12 @@ public: { if (arguments.size() > 1) { - throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0 or 1", getName()); } if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type)) { - throw Exception( - "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} should be String or FixedString", + getName()); } if (arguments.size() == 1) { @@ -114,12 +114,12 @@ public: { if (arguments.size() > 1) { - throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0 or 1", getName()); } if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type)) { - throw Exception( - "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} should be String or FixedString", + getName()); } if (arguments.size() == 1) return std::make_unique( diff --git a/src/Functions/now64.cpp b/src/Functions/now64.cpp index e075dab6b0f..f29b73061d9 100644 --- a/src/Functions/now64.cpp +++ b/src/Functions/now64.cpp @@ -124,17 +124,14 @@ public: if (arguments.size() > 2) { - throw Exception("Arguments size of function " + getName() + " should be 0, or 1, or 2", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0, or 1, or 2", getName()); } if (!arguments.empty()) { const auto & argument = arguments[0]; if (!isInteger(argument.type) || !argument.column || !isColumnConst(*argument.column)) - throw Exception("Illegal type " + argument.type->getName() + - " of 0" + - " argument of function " + getName() + - ". Expected const integer.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 0 argument of function {}. " + "Expected const integer.", argument.type->getName(), getName()); scale = static_cast(argument.column->get64(0)); } diff --git a/src/Functions/nowInBlock.cpp b/src/Functions/nowInBlock.cpp index b1764590fda..7b8d3d983ad 100644 --- a/src/Functions/nowInBlock.cpp +++ b/src/Functions/nowInBlock.cpp @@ -58,12 +58,12 @@ public: { if (arguments.size() > 1) { - throw Exception("Arguments size of function " + getName() + " should be 0 or 1", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of function {} should be 0 or 1", getName()); } if (arguments.size() == 1 && !isStringOrFixedString(arguments[0].type)) { - throw Exception( - "Arguments of function " + getName() + " should be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments of function {} should be String or FixedString", + getName()); } if (arguments.size() == 1) { diff --git a/src/Functions/padString.cpp b/src/Functions/padString.cpp index 486fa328fa0..815d44a356b 100644 --- a/src/Functions/padString.cpp +++ b/src/Functions/padString.cpp @@ -164,7 +164,7 @@ namespace ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", getName(), - std::to_string(number_of_arguments)); + number_of_arguments); if (!isStringOrFixedString(arguments[0])) throw Exception( @@ -271,9 +271,8 @@ namespace new_length = new_length_slice.elements->getUInt(new_length_slice.position); if (new_length > MAX_NEW_LENGTH) { - throw Exception( - "New padded length (" + std::to_string(new_length) + ") is too big, maximum is: " + std::to_string(MAX_NEW_LENGTH), - ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "New padded length ({}) is too big, maximum is: {}", + std::to_string(new_length), std::to_string(MAX_NEW_LENGTH)); } if (is_const_new_length) { diff --git a/src/Functions/parseTimeDelta.cpp b/src/Functions/parseTimeDelta.cpp index 6842997a71c..7743a0cb664 100644 --- a/src/Functions/parseTimeDelta.cpp +++ b/src/Functions/parseTimeDelta.cpp @@ -120,14 +120,14 @@ namespace ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), - toString(arguments.size())); + arguments.size()); if (arguments.size() > 1) throw Exception( ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), - toString(arguments.size())); + arguments.size()); const IDataType & type = *arguments[0]; diff --git a/src/Functions/partitionId.cpp b/src/Functions/partitionId.cpp index 22c3910feaf..e2e9038cd8b 100644 --- a/src/Functions/partitionId.cpp +++ b/src/Functions/partitionId.cpp @@ -40,7 +40,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); return std::make_shared(); } diff --git a/src/Functions/pointInEllipses.cpp b/src/Functions/pointInEllipses.cpp index 07b7f013cac..208f2ad2f82 100644 --- a/src/Functions/pointInEllipses.cpp +++ b/src/Functions/pointInEllipses.cpp @@ -63,16 +63,16 @@ private: { if (arguments.size() < 6 || arguments.size() % 4 != 2) { - throw Exception( - "Incorrect number of arguments of function " + getName() + ". Must be 2 for your point plus 4 * N for ellipses (x_i, y_i, a_i, b_i).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Incorrect number of arguments of function {}. " + "Must be 2 for your point plus 4 * N for ellipses (x_i, y_i, a_i, b_i).", getName()); } /// For array on stack, see below. if (arguments.size() > 10000) { - throw Exception( - "Number of arguments of function " + getName() + " is too large.", ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Number of arguments of function {} is too large.", + getName()); } for (const auto arg_idx : collections::range(0, arguments.size())) @@ -80,9 +80,8 @@ private: const auto * arg = arguments[arg_idx].get(); if (!WhichDataType(arg).isFloat64()) { - throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. " + "Must be Float64", arg->getName(), std::to_string(arg_idx + 1), getName()); } } @@ -110,9 +109,8 @@ private: } else { - throw Exception( - "Illegal type " + column->getName() + " of argument " + std::to_string(arg_idx + 1) + " of function " + getName() + ". Must be const Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. " + "Must be const Float64", column->getName(), std::to_string(arg_idx + 1), getName()); } } ellipses[ellipse_idx] = Ellipse{ellipse_data[0], ellipse_data[1], ellipse_data[2], ellipse_data[3]}; @@ -128,8 +126,8 @@ private: } else if (!typeid_cast *> (column)) { - throw Exception("Illegal column " + column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + column->getName(), getName()); } } @@ -162,9 +160,8 @@ private: } else { - throw Exception( - "Illegal types " + col_x->getName() + ", " + col_y->getName() + " of arguments 1, 2 of function " + getName() + ". Both must be either const or vector", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal types {}, {} of arguments 1, 2 of function {}. " + "Both must be either const or vector", col_x->getName(), col_y->getName(), getName()); } } diff --git a/src/Functions/pointInPolygon.cpp b/src/Functions/pointInPolygon.cpp index ee2f566e619..24ad1d20611 100644 --- a/src/Functions/pointInPolygon.cpp +++ b/src/Functions/pointInPolygon.cpp @@ -87,7 +87,7 @@ public: { if (arguments.size() < 2) { - throw Exception("Too few arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Too few arguments"); } /** We allow function invocation in one of the following forms: @@ -103,19 +103,19 @@ public: auto validate_tuple = [this](size_t i, const DataTypeTuple * tuple) { if (tuple == nullptr) - throw Exception(getMessagePrefix(i) + " must contain a tuple", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} must contain a tuple", getMessagePrefix(i)); const DataTypes & elements = tuple->getElements(); if (elements.size() != 2) - throw Exception(getMessagePrefix(i) + " must have exactly two elements", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} must have exactly two elements", getMessagePrefix(i)); for (auto j : collections::range(0, elements.size())) { if (!isNativeNumber(elements[j])) { - throw Exception(getMessagePrefix(i) + " must contain numeric tuple at position " + toString(j + 1), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} must contain numeric tuple at position {}", + getMessagePrefix(i), j + 1); } } }; @@ -126,8 +126,7 @@ public: { const auto * array = checkAndGetDataType(arguments[1].get()); if (array == nullptr) - throw Exception(getMessagePrefix(1) + " must contain an array of tuples or an array of arrays of tuples.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} must contain an array of tuples or an array of arrays of tuples.", getMessagePrefix(1)); const auto * nested_array = checkAndGetDataType(array->getNestedType().get()); if (nested_array != nullptr) @@ -143,8 +142,7 @@ public: { const auto * array = checkAndGetDataType(arguments[i].get()); if (array == nullptr) - throw Exception(getMessagePrefix(i) + " must contain an array of tuples", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "{} must contain an array of tuples", getMessagePrefix(i)); validate_tuple(i, checkAndGetDataType(array->getNestedType().get())); } @@ -162,8 +160,8 @@ public: const auto * tuple_col = checkAndGetColumn(point_col); if (!tuple_col) - throw Exception("First argument for function " + getName() + " must be constant array of tuples.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant array of tuples.", + getName()); const auto & tuple_columns = tuple_col->getColumns(); @@ -216,8 +214,8 @@ public: else { if (arguments.size() != 2) - throw Exception("Multi-argument version of function " + getName() + " works only with const polygon", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multi-argument version of function {} works only with const polygon", + getName()); auto res_column = ColumnVector::create(input_rows_count); auto & data = res_column->getData(); @@ -431,8 +429,9 @@ private: { Int64 result = 0; if (common::mulOverflow(static_cast(x_data[i]), static_cast(y_data[i]), result)) - throw Exception("The coordinates of the point are such that subsequent calculations cannot be performed correctly. " \ - "Most likely they are very large in modulus.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The coordinates of the point are such that " + "subsequent calculations cannot be performed correctly. " + "Most likely they are very large in modulus."); out_container.emplace_back(x_data[i], y_data[i]); } @@ -485,14 +484,14 @@ private: { const auto * const_col = checkAndGetColumn(arguments[i].column.get()); if (!const_col) - throw Exception("Multi-argument version of function " + getName() + " works only with const polygon", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multi-argument version of function {} works only with const polygon", + getName()); const auto * array_col = checkAndGetColumn(&const_col->getDataColumn()); const auto * tuple_col = array_col ? checkAndGetColumn(&array_col->getData()) : nullptr; if (!tuple_col) - throw Exception(getMessagePrefix(i) + " must be constant array of tuples", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} must be constant array of tuples", getMessagePrefix(i)); const auto & tuple_columns = tuple_col->getColumns(); const auto & column_x = tuple_columns[0]; @@ -506,7 +505,7 @@ private: auto size = column_x->size(); if (size == 0) - throw Exception(getMessagePrefix(i) + " shouldn't be empty.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} shouldn't be empty.", getMessagePrefix(i)); for (auto j : collections::range(0, size)) { @@ -566,7 +565,7 @@ private: std::string failure_message; auto is_valid = boost::geometry::is_valid(out_polygon, failure_message); if (!is_valid) - throw Exception("Polygon is not valid: " + failure_message, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Polygon is not valid: {}", failure_message); } #endif } diff --git a/src/Functions/polygonArea.cpp b/src/Functions/polygonArea.cpp index a83e960dc0d..e49a4eb9fb3 100644 --- a/src/Functions/polygonArea.cpp +++ b/src/Functions/polygonArea.cpp @@ -73,7 +73,7 @@ public: using Converter = typename TypeConverter::Type; if constexpr (std::is_same_v, Converter>) - throw Exception(fmt::format("The argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must not be Point", getName()); else { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonConvexHull.cpp b/src/Functions/polygonConvexHull.cpp index c0b079d3014..d7fca45bd1e 100644 --- a/src/Functions/polygonConvexHull.cpp +++ b/src/Functions/polygonConvexHull.cpp @@ -70,7 +70,7 @@ public: using Converter = typename TypeConverter::Type; if constexpr (std::is_same_v>) - throw Exception(fmt::format("The argument of function {} must not be a Point", getName()), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The argument of function {} must not be a Point", getName()); else { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonPerimeter.cpp b/src/Functions/polygonPerimeter.cpp index e128bf0c03b..2d89d4e4f5b 100644 --- a/src/Functions/polygonPerimeter.cpp +++ b/src/Functions/polygonPerimeter.cpp @@ -72,7 +72,7 @@ public: using Converter = typename TypeConverter::Type; if constexpr (std::is_same_v, Converter>) - throw Exception(fmt::format("The argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must not be Point", getName()); else { auto geometries = Converter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsDistance.cpp b/src/Functions/polygonsDistance.cpp index 3b01d01f311..d2c58105eae 100644 --- a/src/Functions/polygonsDistance.cpp +++ b/src/Functions/polygonsDistance.cpp @@ -77,7 +77,7 @@ public: using RightConverter = typename RightConverterType::Type; if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) - throw Exception(fmt::format("Any argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsEquals.cpp b/src/Functions/polygonsEquals.cpp index 2793df7efac..3c80ae1e4c5 100644 --- a/src/Functions/polygonsEquals.cpp +++ b/src/Functions/polygonsEquals.cpp @@ -76,7 +76,7 @@ public: using RightConverter = typename RightConverterType::Type; if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) - throw Exception(fmt::format("Any argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsIntersection.cpp b/src/Functions/polygonsIntersection.cpp index dd17665edba..84e5fe0d4b7 100644 --- a/src/Functions/polygonsIntersection.cpp +++ b/src/Functions/polygonsIntersection.cpp @@ -75,7 +75,7 @@ public: using RightConverter = typename RightConverterType::Type; if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) - throw Exception(fmt::format("Any argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsSymDifference.cpp b/src/Functions/polygonsSymDifference.cpp index b8f4443f1ed..ceb39547427 100644 --- a/src/Functions/polygonsSymDifference.cpp +++ b/src/Functions/polygonsSymDifference.cpp @@ -74,7 +74,7 @@ public: using RightConverter = typename RightConverterType::Type; if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) - throw Exception(fmt::format("Any argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsUnion.cpp b/src/Functions/polygonsUnion.cpp index ea770f461f6..4a604d0f810 100644 --- a/src/Functions/polygonsUnion.cpp +++ b/src/Functions/polygonsUnion.cpp @@ -74,7 +74,7 @@ public: using RightConverter = typename RightConverterType::Type; if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) - throw Exception(fmt::format("Any argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/polygonsWithin.cpp b/src/Functions/polygonsWithin.cpp index 6cc6a8cb2ef..1b094f42060 100644 --- a/src/Functions/polygonsWithin.cpp +++ b/src/Functions/polygonsWithin.cpp @@ -78,7 +78,7 @@ public: using RightConverter = typename RightConverterType::Type; if constexpr (std::is_same_v, LeftConverter> || std::is_same_v, RightConverter>) - throw Exception(fmt::format("Any argument of function {} must not be Point", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Any argument of function {} must not be Point", getName()); else { auto first = LeftConverter::convert(arguments[0].column->convertToFullColumnIfConst()); diff --git a/src/Functions/randConstant.cpp b/src/Functions/randConstant.cpp index d19518ce765..dd629538e85 100644 --- a/src/Functions/randConstant.cpp +++ b/src/Functions/randConstant.cpp @@ -92,9 +92,9 @@ public: { size_t number_of_arguments = data_types.size(); if (number_of_arguments > 1) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(number_of_arguments) + ", should be 0 or 1.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 0 or 1.", + getName(), number_of_arguments); return std::make_shared>(); } diff --git a/src/Functions/randDistribution.cpp b/src/Functions/randDistribution.cpp index a4b878f863e..f92a2c7c034 100644 --- a/src/Functions/randDistribution.cpp +++ b/src/Functions/randDistribution.cpp @@ -210,12 +210,14 @@ private: { if (parameter_number >= arguments.size()) throw Exception( - ErrorCodes::LOGICAL_ERROR, "Parameter number ({}) is greater than the size of arguments ({}). This is a bug", parameter_number, arguments.size()); + ErrorCodes::LOGICAL_ERROR, + "Parameter number ({}) is greater than the size of arguments ({}). This is a bug", + parameter_number, arguments.size()); const IColumn * col = arguments[parameter_number].column.get(); if (!isColumnConst(*col)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Parameter number {} of function must be constant.", parameter_number, getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Parameter number {} of function {} must be constant.", parameter_number, getName()); auto parameter = applyVisitor(FieldVisitorConvertToNumber(), assert_cast(*col).getField()); @@ -243,7 +245,9 @@ public: { auto desired = Distribution::getNumberOfArguments(); if (arguments.size() != desired && arguments.size() != desired + 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of arguments for function {}. Should be {} or {}", getName(), desired, desired + 1); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Wrong number of arguments for function {}. Should be {} or {}", + getName(), desired, desired + 1); for (size_t i = 0; i < Distribution::getNumberOfArguments(); ++i) { diff --git a/src/Functions/randomFixedString.cpp b/src/Functions/randomFixedString.cpp index a0b7a999d36..508fae3e824 100644 --- a/src/Functions/randomFixedString.cpp +++ b/src/Functions/randomFixedString.cpp @@ -42,10 +42,10 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isUnsignedInteger(arguments[0].type)) - throw Exception("First argument for function " + getName() + " must be unsigned integer", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be unsigned integer", getName()); if (!arguments[0].column || !isColumnConst(*arguments[0].column)) - throw Exception("First argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "First argument for function {} must be constant", getName()); const size_t n = assert_cast(*arguments[0].column).getValue(); return std::make_shared(n); @@ -66,7 +66,7 @@ public: size_t total_size; if (common::mulOverflow(input_rows_count, n, total_size)) - throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW); + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow"); /// Fill random bytes. data_to.resize(total_size); diff --git a/src/Functions/randomPrintableASCII.cpp b/src/Functions/randomPrintableASCII.cpp index cd46f175977..33c4c1405fd 100644 --- a/src/Functions/randomPrintableASCII.cpp +++ b/src/Functions/randomPrintableASCII.cpp @@ -41,16 +41,16 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " requires at least one argument: the size of resulting string", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires at least one argument: the size of resulting string", getName()); if (arguments.size() > 2) - throw Exception("Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires at most two arguments: the size of resulting string and optional disambiguation tag", getName()); const IDataType & length_type = *arguments[0]; if (!isNumber(length_type)) - throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must have numeric type", getName()); return std::make_shared(); } @@ -74,7 +74,7 @@ public: { size_t length = length_column.getUInt(row_num); if (length > (1 << 30)) - throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size in function {}", getName()); IColumn::Offset next_offset = offset + length + 1; data_to.resize(next_offset); diff --git a/src/Functions/randomString.cpp b/src/Functions/randomString.cpp index 16b0254c536..4afd0799d29 100644 --- a/src/Functions/randomString.cpp +++ b/src/Functions/randomString.cpp @@ -40,18 +40,16 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception( - "Function " + getName() + " requires at least one argument: the size of resulting string", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires at least one argument: the size of resulting string", getName()); if (arguments.size() > 2) - throw Exception( - "Function " + getName() + " requires at most two arguments: the size of resulting string and optional disambiguation tag", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Function {} requires at most two arguments: the size of resulting string and optional disambiguation tag", getName()); const IDataType & length_type = *arguments[0]; if (!isNumber(length_type)) - throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must have numeric type", getName()); return std::make_shared(); } @@ -77,7 +75,7 @@ public: { size_t length = length_column.getUInt(row_num); if (length > (1 << 30)) - throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size in function {}", getName()); offset += length + 1; offsets_to[row_num] = offset; diff --git a/src/Functions/randomStringUTF8.cpp b/src/Functions/randomStringUTF8.cpp index bcaa603b85d..14860338d4a 100644 --- a/src/Functions/randomStringUTF8.cpp +++ b/src/Functions/randomStringUTF8.cpp @@ -41,7 +41,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isNumber(*arguments[0])) - throw Exception("First argument of function " + getName() + " must have numeric type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument of function {} must have numeric type", getName()); return std::make_shared(); } @@ -74,7 +74,7 @@ public: */ if (summary_utf8_len > (1 << 29)) - throw Exception("Too large string size in function " + getName(), ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size in function {}", getName()); size_t size_in_bytes_with_margin = summary_utf8_len * 4 + input_rows_count; data_to.resize(size_in_bytes_with_margin); diff --git a/src/Functions/readWkt.cpp b/src/Functions/readWkt.cpp index 28ed9a64ea8..ec20cdf3723 100644 --- a/src/Functions/readWkt.cpp +++ b/src/Functions/readWkt.cpp @@ -41,8 +41,7 @@ public: { if (checkAndGetDataType(arguments[0].get()) == nullptr) { - throw Exception("First argument should be String", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument should be String"); } return DataTypeFactory::instance().get(DataTypeName().getName()); diff --git a/src/Functions/regexpQuoteMeta.cpp b/src/Functions/regexpQuoteMeta.cpp index d0ec572ac59..bd8cf161518 100644 --- a/src/Functions/regexpQuoteMeta.cpp +++ b/src/Functions/regexpQuoteMeta.cpp @@ -46,9 +46,8 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!WhichDataType(arguments[0].type).isString()) - throw Exception( - "Illegal type " + arguments[0].type->getName() + " of 1 argument of function " + getName() + ". Must be String.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of 1 argument of function {}. Must be String.", + arguments[0].type->getName(), getName()); return std::make_shared(); } @@ -59,9 +58,8 @@ public: const ColumnString * input = checkAndGetColumn(column_string.get()); if (!input) - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); auto dst_column = ColumnString::create(); auto & dst_data = dst_column->getChars(); diff --git a/src/Functions/repeat.cpp b/src/Functions/repeat.cpp index 748615f9ce5..dcd05f373fc 100644 --- a/src/Functions/repeat.cpp +++ b/src/Functions/repeat.cpp @@ -27,16 +27,16 @@ struct RepeatImpl { static constexpr UInt64 max_repeat_times = 1000000; if (repeat_time > max_repeat_times) - throw Exception("Too many times to repeat (" + std::to_string(repeat_time) + "), maximum is: " + std::to_string(max_repeat_times), - ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too many times to repeat ({}), maximum is: {}", + std::to_string(repeat_time), std::to_string(max_repeat_times)); } static inline void checkStringSize(UInt64 size) { static constexpr UInt64 max_string_size = 1 << 30; if (size > max_string_size) - throw Exception("Too large string size (" + std::to_string(size) + ") in function repeat, maximum is: " + std::to_string(max_string_size), - ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string size ({}) in function repeat, maximum is: {}", + size, max_string_size); } static void vectorStrConstRepeat( @@ -184,11 +184,11 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); if (!isUnsignedInteger(arguments[1])) - throw Exception( - "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[1]->getName(), getName()); return arguments[0]; } @@ -244,9 +244,8 @@ public: } } - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } }; diff --git a/src/Functions/replicate.cpp b/src/Functions/replicate.cpp index f55f89bcac1..ca33571c448 100644 --- a/src/Functions/replicate.cpp +++ b/src/Functions/replicate.cpp @@ -46,7 +46,7 @@ ColumnPtr FunctionReplicate::executeImpl(const ColumnsWithTypeAndName & argument { const auto * const_array_column = checkAndGetColumnConst(arguments[i].column.get()); if (!const_array_column) - throw Exception("Unexpected column for replicate", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column for replicate"); temp_column = const_array_column->convertToFullColumn(); array_column = checkAndGetColumn(temp_column.get()); } diff --git a/src/Functions/reverse.cpp b/src/Functions/reverse.cpp index 08234afaff0..32b998523c7 100644 --- a/src/Functions/reverse.cpp +++ b/src/Functions/reverse.cpp @@ -83,8 +83,8 @@ public: { if (!isStringOrFixedString(arguments[0]) && !isArray(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); return arguments[0]; } @@ -107,9 +107,8 @@ public: return col_res; } else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } }; diff --git a/src/Functions/reverseUTF8.cpp b/src/Functions/reverseUTF8.cpp index 06567f0b325..8a76af05d86 100644 --- a/src/Functions/reverseUTF8.cpp +++ b/src/Functions/reverseUTF8.cpp @@ -64,7 +64,7 @@ struct ReverseUTF8Impl [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Cannot apply function reverseUTF8 to fixed string.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot apply function reverseUTF8 to fixed string."); } }; diff --git a/src/Functions/roundToExp2.cpp b/src/Functions/roundToExp2.cpp index 7893773fb61..ef29b5eaf4a 100644 --- a/src/Functions/roundToExp2.cpp +++ b/src/Functions/roundToExp2.cpp @@ -45,7 +45,7 @@ template requires is_big_int_v inline T roundDownToPowerOfTwo(T) { - throw Exception("roundToExp2() for big integers is not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "roundToExp2() for big integers is not implemented"); } /** For integer data types: diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index 877ad877f56..b6fc92a7eb9 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -67,13 +67,14 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty() || arguments.size() > 2) - throw Exception("Incorrect number of arguments of function " + getName() + ". Must be 1 or 2.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Incorrect number of arguments of function {}. Must be 1 or 2.", getName()); const DataTypeAggregateFunction * type = checkAndGetDataType(arguments[0].get()); if (!type) - throw Exception("Argument for function " + getName() + " must have type AggregateFunction - state of aggregate function.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Argument for function {} must have type AggregateFunction - state " + "of aggregate function.", getName()); return type->getReturnType(); } @@ -84,10 +85,8 @@ public: = typeid_cast(&*arguments.at(0).column); if (!column_with_states) - throw Exception("Illegal column " + arguments.at(0).column->getName() - + " of first argument of function " - + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments.at(0).column->getName(), getName()); ColumnPtr column_with_groups; diff --git a/src/Functions/runningConcurrency.cpp b/src/Functions/runningConcurrency.cpp index c112165fda7..61ee077d218 100644 --- a/src/Functions/runningConcurrency.cpp +++ b/src/Functions/runningConcurrency.cpp @@ -37,8 +37,7 @@ namespace DB const ColVecArg * col_begin = checkAndGetColumn(arguments[0].column.get()); const ColVecArg * col_end = checkAndGetColumn(arguments[1].column.get()); if (!col_begin || !col_end) - throw Exception("Constant columns are not supported at the moment", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Constant columns are not supported at the moment"); const typename ColVecArg::Container & vec_begin = col_begin->getData(); const typename ColVecArg::Container & vec_end = col_end->getData(); @@ -61,9 +60,7 @@ namespace DB WriteBufferFromOwnString buf_begin, buf_end; begin_serializaion->serializeTextQuoted(*(arguments[0].column), i, buf_begin, default_format); end_serialization->serializeTextQuoted(*(arguments[1].column), i, buf_end, default_format); - throw Exception( - "Incorrect order of events: " + buf_begin.str() + " > " + buf_end.str(), - ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect order of events: {} > {}", buf_begin.str(), buf_end.str()); } ongoing_until.insert(end); @@ -148,9 +145,7 @@ namespace DB case TypeIndex::DateTime: f(TypeTag()); break; case TypeIndex::DateTime64: f(TypeTag()); break; default: - throw Exception( - "Arguments for function " + getName() + " must be Date, DateTime, or DateTime64.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Arguments for function {} must be Date, DateTime, or DateTime64.", getName()); } } @@ -172,9 +167,7 @@ namespace DB // The type of the second argument must match with that of the first one. if (unlikely(!arguments[1].type->equals(*(arguments[0].type)))) { - throw Exception( - "Function " + getName() + " must be called with two arguments having the same type.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} must be called with two arguments having the same type.", getName()); } DataTypes argument_types = { arguments[0].type, arguments[1].type }; diff --git a/src/Functions/runningDifference.h b/src/Functions/runningDifference.h index 5e58d0d8aaf..154370d4cd9 100644 --- a/src/Functions/runningDifference.h +++ b/src/Functions/runningDifference.h @@ -121,7 +121,7 @@ private: else if (which.isDateTime()) f(DataTypeDateTime::FieldType()); else - throw Exception("Argument for function " + getName() + " must have numeric type.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Argument for function {} must have numeric type.", getName()); } public: diff --git a/src/Functions/sleep.h b/src/Functions/sleep.h index 487a0909f10..8edd3b73288 100644 --- a/src/Functions/sleep.h +++ b/src/Functions/sleep.h @@ -72,8 +72,8 @@ public: if (!which.isFloat() && !which.isNativeUInt()) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName() + ", expected Float64", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}, expected Float64", + arguments[0]->getName(), getName()); return std::make_shared(); } @@ -83,12 +83,12 @@ public: const IColumn * col = arguments[0].column.get(); if (!isColumnConst(*col)) - throw Exception("The argument of function " + getName() + " must be constant.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The argument of function {} must be constant.", getName()); Float64 seconds = applyVisitor(FieldVisitorConvertToNumber(), assert_cast(*col).getField()); if (seconds < 0 || !std::isfinite(seconds)) - throw Exception("Cannot sleep infinite or negative amount of time (not implemented)", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot sleep infinite or negative amount of time (not implemented)"); size_t size = col->size(); @@ -97,7 +97,7 @@ public: { /// When sleeping, the query cannot be cancelled. For ability to cancel query, we limit sleep time. if (seconds > 3.0) /// The choice is arbitrary - throw Exception("The maximum sleep time is 3 seconds. Requested: " + toString(seconds), ErrorCodes::TOO_SLOW); + throw Exception(ErrorCodes::TOO_SLOW, "The maximum sleep time is 3 seconds. Requested: {}", toString(seconds)); UInt64 count = (variant == FunctionSleepVariant::PerBlock ? 1 : size); UInt64 microseconds = static_cast(seconds * count * 1e6); diff --git a/src/Functions/stem.cpp b/src/Functions/stem.cpp index 91c98ec9b82..d326d17a7a8 100644 --- a/src/Functions/stem.cpp +++ b/src/Functions/stem.cpp @@ -37,9 +37,7 @@ struct StemImpl if (stemmer == nullptr) { - throw Exception( - "Language " + language + " is not supported for function stem", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Language {} is not supported for function stem", language); } res_data.resize(data.size()); @@ -74,7 +72,9 @@ public: static FunctionPtr create(ContextPtr context) { if (!context->getSettingsRef().allow_experimental_nlp_functions) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Natural language processing function '{}' is experimental. " + "Set `allow_experimental_nlp_functions` setting to enable it", name); return std::make_shared(); } @@ -88,11 +88,11 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); if (!isString(arguments[1])) - throw Exception( - "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[1]->getName(), getName()); return arguments[1]; } @@ -109,11 +109,11 @@ public: const ColumnString * words_col = checkAndGetColumn(strcolumn.get()); if (!lang_col) - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); if (!words_col) - throw Exception( - "Illegal column " + arguments[1].column->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[1].column->getName(), getName()); String language = lang_col->getValue(); diff --git a/src/Functions/stringCutToZero.cpp b/src/Functions/stringCutToZero.cpp index caeedaceae7..b9f742cd8bc 100644 --- a/src/Functions/stringCutToZero.cpp +++ b/src/Functions/stringCutToZero.cpp @@ -32,8 +32,8 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isStringOrFixedString(arguments[0])) - throw Exception("Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); return std::make_shared(); } @@ -76,7 +76,7 @@ public: out_vec.resize(pos - begin); if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); col_res = std::move(col_str); return true; @@ -122,7 +122,7 @@ public: out_vec.resize(pos - begin); if (!out_offsets.empty() && out_offsets.back() != out_vec.size()) - throw Exception("Column size mismatch (internal logical error)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Column size mismatch (internal logical error)"); col_res = std::move(col_str); return true; @@ -141,9 +141,8 @@ public: if (tryExecuteFixedString(column, res_column) || tryExecuteString(column, res_column)) return res_column; - throw Exception("Illegal column " + arguments[0].column->getName() - + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); } }; diff --git a/src/Functions/stringToH3.cpp b/src/Functions/stringToH3.cpp index 21da96b41a9..d8728b346d0 100644 --- a/src/Functions/stringToH3.cpp +++ b/src/Functions/stringToH3.cpp @@ -44,9 +44,8 @@ public: { const auto * arg = arguments[0].get(); if (!WhichDataType(arg).isStringOrFixedString()) - throw Exception( - "Illegal type " + arg->getName() + " of argument " + std::to_string(1) + " of function " + getName() + ". Must be String or FixedString", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument {} of function {}. " + "Must be String or FixedString", arg->getName(), std::to_string(1), getName()); return std::make_shared(); } @@ -68,7 +67,7 @@ public: else if (const ColumnConst * h3index_const_fixed = checkAndGetColumnConst(col_hindex)) execute>(ConstSource(*h3index_const_fixed), dst_data); else - throw Exception("Illegal column as argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column as argument of function {}", getName()); return dst; } @@ -89,7 +88,7 @@ private: if (res_data[row_num] == 0) { - throw Exception("Invalid H3 index: " + h3index_str, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid H3 index: {}", h3index_str); } h3index_source.next(); diff --git a/src/Functions/substring.cpp b/src/Functions/substring.cpp index 9f0bfa8211c..7678692f612 100644 --- a/src/Functions/substring.cpp +++ b/src/Functions/substring.cpp @@ -111,7 +111,7 @@ public: sliceFromRightConstantOffsetBounded( source, StringSink(*col_res, input_rows_count), -static_cast(start_value), length_value); else - throw Exception("Indices in strings are 1-based", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX); + throw Exception(ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX, "Indices in strings are 1-based"); } else sliceDynamicOffsetBounded(source, StringSink(*col_res, input_rows_count), *column_start, *column_length); @@ -154,9 +154,8 @@ public: return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, ConstSource(*col_const), input_rows_count); else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } else { @@ -173,9 +172,8 @@ public: return executeForSource(column_start, column_length, column_start_const, column_length_const, start_value, length_value, ConstSource(*col_const_fixed), input_rows_count); else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } } }; diff --git a/src/Functions/svg.cpp b/src/Functions/svg.cpp index 4cf1598857b..69e619df901 100644 --- a/src/Functions/svg.cpp +++ b/src/Functions/svg.cpp @@ -50,15 +50,15 @@ public: { if (arguments.size() > 2) { - throw Exception("Too many arguments", ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION); + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Too many arguments"); } else if (arguments.empty()) { - throw Exception("Too few arguments", ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION); + throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Too few arguments"); } else if (arguments.size() == 2 && checkAndGetDataType(arguments[1].get()) == nullptr) { - throw Exception("Second argument should be String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Second argument should be String"); } return std::make_shared(); diff --git a/src/Functions/synonyms.cpp b/src/Functions/synonyms.cpp index 4ebe61e4b2c..6b3e205785c 100644 --- a/src/Functions/synonyms.cpp +++ b/src/Functions/synonyms.cpp @@ -32,7 +32,9 @@ public: static FunctionPtr create(ContextPtr context) { if (!context->getSettingsRef().allow_experimental_nlp_functions) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Natural language processing function '{}' is experimental. Set `allow_experimental_nlp_functions` setting to enable it", name); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Natural language processing function '{}' is experimental. " + "Set `allow_experimental_nlp_functions` setting to enable it", name); return std::make_shared(context->getSynonymsExtensions()); } @@ -53,11 +55,11 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[0]->getName(), getName()); if (!isString(arguments[1])) - throw Exception( - "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}", + arguments[1]->getName(), getName()); return std::make_shared(std::make_shared()); } @@ -74,13 +76,11 @@ public: const ColumnString * word_col = checkAndGetColumn(strcolumn.get()); if (!ext_col) - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[0].column->getName(), getName()); if (!word_col) - throw Exception( - "Illegal column " + arguments[1].column->getName() + " of argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of argument of function {}", + arguments[1].column->getName(), getName()); String ext_name = ext_col->getValue(); auto extension = extensions.getExtension(ext_name); diff --git a/src/Functions/throwIf.cpp b/src/Functions/throwIf.cpp index 357c5e0651a..c6ff5b9151f 100644 --- a/src/Functions/throwIf.cpp +++ b/src/Functions/throwIf.cpp @@ -43,7 +43,7 @@ public: if (number_of_arguments < 1 || number_of_arguments > (allow_custom_error_code_argument ? 3 : 2)) throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: passed {}, should be {}", - getName(), toString(number_of_arguments), allow_custom_error_code_argument ? "1 or 2 or 3" : "1 or 2"); + getName(), number_of_arguments, allow_custom_error_code_argument ? "1 or 2 or 3" : "1 or 2"); if (!isNativeNumber(arguments[0])) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, @@ -132,9 +132,14 @@ private: const auto & in_data = in->getData(); if (!memoryIsZero(in_data.data(), 0, in_data.size() * sizeof(in_data[0]))) { - throw Exception( - error_code.value_or(ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO), - message.value_or("Value passed to '" + getName() + "' function is non-zero")); + if (message.has_value()) + throw Exception::createRuntime( + error_code.value_or(ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO), + *message); + else + throw Exception( + error_code.value_or(ErrorCodes::FUNCTION_THROW_IF_VALUE_IS_NON_ZERO), + "Value passed to '{}' function is non-zero", getName()); } size_t result_size = in_untyped->size(); diff --git a/src/Functions/timeSlots.cpp b/src/Functions/timeSlots.cpp index 72d6059e0a1..568ab5e5a47 100644 --- a/src/Functions/timeSlots.cpp +++ b/src/Functions/timeSlots.cpp @@ -44,7 +44,7 @@ struct TimeSlotsImpl PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { if (time_slot_size == 0) - throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); size_t size = starts.size(); @@ -69,7 +69,7 @@ struct TimeSlotsImpl PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { if (time_slot_size == 0) - throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); size_t size = starts.size(); @@ -94,7 +94,7 @@ struct TimeSlotsImpl PaddedPODArray & result_values, ColumnArray::Offsets & result_offsets) { if (time_slot_size == 0) - throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); size_t size = durations.size(); @@ -137,7 +137,7 @@ struct TimeSlotsImpl ColumnArray::Offset current_offset = 0; time_slot_size = time_slot_size.value * ts_multiplier; if (time_slot_size == 0) - throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); for (size_t i = 0; i < size; ++i) { @@ -170,7 +170,7 @@ struct TimeSlotsImpl duration = duration * dur_multiplier; time_slot_size = time_slot_size.value * ts_multiplier; if (time_slot_size == 0) - throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); for (size_t i = 0; i < size; ++i) { @@ -203,7 +203,7 @@ struct TimeSlotsImpl start = dt_multiplier * start; time_slot_size = time_slot_size.value * ts_multiplier; if (time_slot_size == 0) - throw Exception("Time slot size cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Time slot size cannot be zero"); for (size_t i = 0; i < size; ++i) { @@ -238,37 +238,33 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2 && arguments.size() != 3) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), arguments.size()); if (WhichDataType(arguments[0].type).isDateTime()) { if (!WhichDataType(arguments[1].type).isUInt32()) - throw Exception( - "Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + ". Must be UInt32 when first argument is DateTime.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}. " + "Must be UInt32 when first argument is DateTime.", arguments[1].type->getName(), getName()); if (arguments.size() == 3 && !WhichDataType(arguments[2].type).isNativeUInt()) - throw Exception( - "Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() + ". Must be UInt32 when first argument is DateTime.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}. " + "Must be UInt32 when first argument is DateTime.", arguments[2].type->getName(), getName()); } else if (WhichDataType(arguments[0].type).isDateTime64()) { if (!WhichDataType(arguments[1].type).isDecimal64()) - throw Exception( - "Illegal type " + arguments[1].type->getName() + " of second argument of function " + getName() + ". Must be Decimal64 when first argument is DateTime64.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}. " + "Must be Decimal64 when first argument is DateTime64.", arguments[1].type->getName(), getName()); if (arguments.size() == 3 && !WhichDataType(arguments[2].type).isDecimal64()) - throw Exception( - "Illegal type " + arguments[2].type->getName() + " of third argument of function " + getName() + ". Must be Decimal64 when first argument is DateTime64.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}. " + "Must be Decimal64 when first argument is DateTime64.", arguments[2].type->getName(), getName()); } else - throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName() - + ". Must be DateTime or DateTime64.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}. " + "Must be DateTime or DateTime64.", arguments[0].type->getName(), getName()); /// If time zone is specified for source data type, attach it to the resulting type. /// Note that there is no explicit time zone argument for this function (we specify 2 as an argument number with explicit time zone). @@ -295,10 +291,10 @@ public: { const auto * time_slot_column = checkAndGetColumn(arguments[2].column.get()); if (!time_slot_column) - throw Exception("Third argument for function " + getName() + " must be constant UInt32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument for function {} must be constant UInt32", getName()); if (time_slot_size = time_slot_column->getValue(); time_slot_size <= 0) - throw Exception("Third argument for function " + getName() + " must be greater than zero", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Third argument for function {} must be greater than zero", getName()); } const auto * dt_starts = checkAndGetColumn(arguments[0].column.get()); @@ -335,10 +331,10 @@ public: { const auto * time_slot_column = checkAndGetColumn(arguments[2].column.get()); if (!time_slot_column) - throw Exception("Third argument for function " + getName() + " must be constant Decimal64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Third argument for function {} must be constant Decimal64", getName()); if (time_slot_size = time_slot_column->getValue(); time_slot_size <= 0) - throw Exception("Third argument for function " + getName() + " must be greater than zero", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Third argument for function {} must be greater than zero", getName()); time_slot_scale = assert_cast(arguments[2].type.get())->getScale(); } @@ -378,17 +374,13 @@ public: if (arguments.size() == 3) { - throw Exception( - "Illegal columns " + arguments[0].column->getName() + ", " + arguments[1].column->getName() + ", " - + arguments[2].column->getName() + " of arguments of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {}, {} of arguments of function {}", + arguments[0].column->getName(), arguments[1].column->getName(), arguments[2].column->getName(), getName()); } else { - throw Exception( - "Illegal columns " + arguments[0].column->getName() + ", " + arguments[1].column->getName() - + " of arguments of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal columns {}, {} of arguments of function {}", + arguments[0].column->getName(), arguments[1].column->getName(), getName()); } } }; diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp index 354d4dea894..06343714b9d 100644 --- a/src/Functions/toDayOfWeek.cpp +++ b/src/Functions/toDayOfWeek.cpp @@ -1,13 +1,12 @@ #include #include -#include #include - +#include namespace DB { -using FunctionToDayOfWeek = FunctionDateOrDateTimeToSomething; +using FunctionToDayOfWeek = FunctionCustomWeekToSomething; REGISTER_FUNCTION(ToDayOfWeek) { diff --git a/src/Functions/toFixedString.h b/src/Functions/toFixedString.h index 200d25283d5..6d14f0f1380 100644 --- a/src/Functions/toFixedString.h +++ b/src/Functions/toFixedString.h @@ -48,11 +48,11 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (!isUnsignedInteger(arguments[1].type)) - throw Exception("Second argument for function " + getName() + " must be unsigned integer", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be unsigned integer", getName()); if (!arguments[1].column) - throw Exception("Second argument for function " + getName() + " must be constant", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be constant", getName()); if (!isStringOrFixedString(arguments[0].type)) - throw Exception(getName() + " is only implemented for types String and FixedString", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{} is only implemented for types String and FixedString", getName()); const size_t n = arguments[1].column->getUInt(0); return std::make_shared(n); @@ -98,8 +98,7 @@ public: { if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Throw) { - throw Exception("String too long for type FixedString(" + toString(n) + ")", - ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String too long for type FixedString({})", toString(n)); } else { @@ -122,7 +121,7 @@ public: { if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Throw) { - throw Exception{"String too long for type FixedString(" + toString(n) + ")", ErrorCodes::TOO_LARGE_STRING_SIZE}; + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String too long for type FixedString({})", toString(n)); } else { @@ -147,7 +146,7 @@ public: else { if constexpr (exception_mode == ConvertToFixedStringExceptionMode::Throw) - throw Exception("Unexpected column: " + column->getName(), ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected column: {}", column->getName()); else { auto column_fixed = ColumnFixedString::create(n); diff --git a/src/Functions/toModifiedJulianDay.cpp b/src/Functions/toModifiedJulianDay.cpp index 6c6eda1673a..0d854bcc110 100644 --- a/src/Functions/toModifiedJulianDay.cpp +++ b/src/Functions/toModifiedJulianDay.cpp @@ -52,9 +52,8 @@ namespace DB } else { - throw Exception("Illegal column " + col_from->getName() - + " of first argument of function " + Name::name, - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + col_from->getName(), Name::name); } using ColVecTo = typename ToDataType::ColumnType; @@ -192,8 +191,8 @@ namespace DB { if (!isStringOrFixedString(arguments[0])) { - throw Exception( - "The argument of function " + getName() + " must be String or FixedString", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The argument of function {} must be String or FixedString", + getName()); } DataTypePtr base_type = std::make_shared(); diff --git a/src/Functions/toStartOfInterval.cpp b/src/Functions/toStartOfInterval.cpp index 3054cf280d9..c0220f1aed2 100644 --- a/src/Functions/toStartOfInterval.cpp +++ b/src/Functions/toStartOfInterval.cpp @@ -222,7 +222,7 @@ namespace { Int64 t_milliseconds = 0; if (common::mulOverflow(t, static_cast(1000) / scale_multiplier, t_milliseconds)) - throw DB::Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); if (likely(t >= 0)) return t_milliseconds / milliseconds * milliseconds; else @@ -259,7 +259,7 @@ namespace { Int64 t_microseconds = 0; if (common::mulOverflow(t, static_cast(1000000) / scale_multiplier, t_microseconds)) - throw DB::Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); if (likely(t >= 0)) return t_microseconds / microseconds * microseconds; else @@ -296,7 +296,7 @@ namespace { Int64 t_nanoseconds = 0; if (common::mulOverflow(t, (static_cast(1000000000) / scale_multiplier), t_nanoseconds)) - throw DB::Exception("Numeric overflow", ErrorCodes::DECIMAL_OVERFLOW); + throw DB::Exception(ErrorCodes::DECIMAL_OVERFLOW, "Numeric overflow"); if (likely(t >= 0)) return t_nanoseconds / nanoseconds * nanoseconds; else @@ -329,10 +329,8 @@ public: auto check_first_argument = [&] { if (!isDate(arguments[0].type) && !isDateTime(arguments[0].type) && !isDateTime64(arguments[0].type)) - throw Exception( - "Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() - + ". Should be a date or a date with time", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + "Should be a date or a date with time", arguments[0].type->getName(), getName()); first_argument_is_date = isDate(arguments[0].type); }; @@ -343,10 +341,8 @@ public: { interval_type = checkAndGetDataType(arguments[1].type.get()); if (!interval_type) - throw Exception( - "Illegal type " + arguments[1].type->getName() + " of argument of function " + getName() - + ". Should be an interval of time", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + "Should be an interval of time", arguments[1].type->getName(), getName()); result_type_is_date = (interval_type->getKind() == IntervalKind::Year) || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month) || (interval_type->getKind() == IntervalKind::Week); @@ -357,10 +353,9 @@ public: auto check_timezone_argument = [&] { if (!WhichDataType(arguments[2].type).isString()) - throw Exception( - "Illegal type " + arguments[2].type->getName() + " of argument of function " + getName() - + ". This argument is optional and must be a constant string with timezone name", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + "This argument is optional and must be a constant string with timezone name", + arguments[2].type->getName(), getName()); if (first_argument_is_date && result_type_is_date) throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The timezone argument of function {} with interval type {} is allowed only when the 1st argument " @@ -381,10 +376,9 @@ public: } else { - throw Exception( - "Number of arguments for function " + getName() + " doesn't match: passed " + toString(arguments.size()) - + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), arguments.size()); } if (result_type_is_date) @@ -462,9 +456,8 @@ private: if (time_column_vec) return dispatchForIntervalColumn(assert_cast(from_datatype), *time_column_vec, interval_column, result_type, time_zone); } - throw Exception( - "Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal column for first argument of function {}. " + "Must contain dates or dates with time", getName()); } template @@ -474,16 +467,15 @@ private: { const auto * interval_type = checkAndGetDataType(interval_column.type.get()); if (!interval_type) - throw Exception( - "Illegal column for second argument of function " + getName() + ", must be an interval of time.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column for second argument of function {}, must be an interval of time.", getName()); const auto * interval_column_const_int64 = checkAndGetColumnConst(interval_column.column.get()); if (!interval_column_const_int64) - throw Exception( - "Illegal column for second argument of function " + getName() + ", must be a const interval of time.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column for second argument of function {}, must be a const interval of time.", + getName()); Int64 num_units = interval_column_const_int64->getValue(); if (num_units <= 0) - throw Exception("Value for second argument of function " + getName() + " must be positive.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for second argument of function {} must be positive.", getName()); switch (interval_type->getKind()) { diff --git a/src/Functions/toTimezone.cpp b/src/Functions/toTimezone.cpp index 3a16c8756fb..ddac22d3173 100644 --- a/src/Functions/toTimezone.cpp +++ b/src/Functions/toTimezone.cpp @@ -90,14 +90,14 @@ public: DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override { if (arguments.size() != 2) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(arguments.size()) + ", should be 2", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2", + getName(), arguments.size()); const auto which_type = WhichDataType(arguments[0].type); if (!which_type.isDateTime() && !which_type.isDateTime64()) - throw Exception{"Illegal type " + arguments[0].type->getName() + " of argument of function " + getName() + - ". Should be DateTime or DateTime64", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument of function {}. " + "Should be DateTime or DateTime64", arguments[0].type->getName(), getName()); String time_zone_name = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0); if (which_type.isDateTime()) diff --git a/src/Functions/toValidUTF8.cpp b/src/Functions/toValidUTF8.cpp index 4b79bc0bbda..e509b59a23e 100644 --- a/src/Functions/toValidUTF8.cpp +++ b/src/Functions/toValidUTF8.cpp @@ -154,7 +154,7 @@ struct ToValidUTF8Impl [[noreturn]] static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Column of type FixedString is not supported by toValidUTF8 function", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Column of type FixedString is not supported by toValidUTF8 function"); } }; diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index c3ddd431dc0..c8a94bcffa6 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -77,36 +77,41 @@ public: { const auto args_size = arguments.size(); if (args_size != 3 && args_size != 4) - throw Exception{"Number of arguments for function " + getName() + " doesn't match: passed " + toString(args_size) + ", should be 3 or 4", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH}; + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Number of arguments for function {} doesn't match: " + "passed {}, should be 3 or 4", getName(), args_size); const DataTypePtr & type_x = arguments[0]; if (!type_x->isValueRepresentedByNumber() && !isString(type_x)) - throw Exception{"Unsupported type " + type_x->getName() - + " of first argument of function " + getName() - + ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Unsupported type {} of first argument " + "of function {}, must be numeric type or Date/DateTime or String", + type_x->getName(), getName()); const DataTypeArray * type_arr_from = checkAndGetDataType(arguments[1].get()); if (!type_arr_from) - throw Exception{"Second argument of function " + getName() - + ", must be array of source values to transform from.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Second argument of function {}, must be array of source values to transform from.", + getName()); const auto type_arr_from_nested = type_arr_from->getNestedType(); if ((type_x->isValueRepresentedByNumber() != type_arr_from_nested->isValueRepresentedByNumber()) || (isString(type_x) != isString(type_arr_from_nested))) { - throw Exception{"First argument and elements of array of second argument of function " + getName() - + " must have compatible types: both numeric or both strings.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "First argument and elements of array " + "of second argument of function {} must have compatible types: " + "both numeric or both strings.", getName()); } const DataTypeArray * type_arr_to = checkAndGetDataType(arguments[2].get()); if (!type_arr_to) - throw Exception{"Third argument of function " + getName() - + ", must be array of destination values to transform to.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Third argument of function {}, must be array of destination values to transform to.", + getName()); const DataTypePtr & type_arr_to_nested = type_arr_to->getNestedType(); @@ -114,9 +119,9 @@ public: { if ((type_x->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber()) || (isString(type_x) != isString(type_arr_to_nested))) - throw Exception{"Function " + getName() - + " has signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} has signature: " + "transform(T, Array(T), Array(U), U) -> U; " + "or transform(T, Array(T), Array(T)) -> T; where T and U are types.", getName()); return getLeastSupertype(DataTypes{type_x, type_arr_to_nested}); } @@ -125,18 +130,19 @@ public: const DataTypePtr & type_default = arguments[3]; if (!type_default->isValueRepresentedByNumber() && !isString(type_default)) - throw Exception{"Unsupported type " + type_default->getName() - + " of fourth argument (default value) of function " + getName() - + ", must be numeric type or Date/DateTime or String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Unsupported type {} of fourth argument (default value) " + "of function {}, must be numeric type or Date/DateTime or String", + type_default->getName(), getName()); bool default_is_string = WhichDataType(type_default).isString(); bool nested_is_string = WhichDataType(type_arr_to_nested).isString(); if ((type_default->isValueRepresentedByNumber() != type_arr_to_nested->isValueRepresentedByNumber()) || (default_is_string != nested_is_string)) - throw Exception{"Function " + getName() - + " have signature: transform(T, Array(T), Array(U), U) -> U; or transform(T, Array(T), Array(T)) -> T; where T and U are types.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT}; + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Function {} have signature: " + "transform(T, Array(T), Array(U), U) -> U; " + "or transform(T, Array(T), Array(T)) -> T; where T and U are types.", getName()); if (type_arr_to_nested->isValueRepresentedByNumber() && type_default->isValueRepresentedByNumber()) { @@ -155,7 +161,7 @@ public: const ColumnConst * array_to = checkAndGetColumnConst(arguments[2].column.get()); if (!array_from || !array_to) - throw Exception{"Second and third arguments of function " + getName() + " must be constant arrays.", ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second and third arguments of function {} must be constant arrays.", getName()); initialize(array_from->getValue(), array_to->getValue(), arguments); @@ -185,7 +191,7 @@ public: && !executeDecimal(in, out, default_column) && !executeString(in, out, default_column)) { - throw Exception{"Illegal column " + in->getName() + " of first argument of function " + getName(), ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", in->getName(), getName()); } return column_result; @@ -214,8 +220,10 @@ private: auto out = typeid_cast *>(out_untyped); if (!out) { - throw Exception{"Illegal column " + out_untyped->getName() + " of elements of array of third argument of function " + getName() - + ", must be " + in->getName(), ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of elements " + "of array of third argument of function {}, must be {}", + out_untyped->getName(), getName(), in->getName()); } executeImplNumToNum(in->getData(), out->getData()); @@ -236,8 +244,8 @@ private: && !executeNumToDecimalWithConstDefault(in, out_untyped) && !executeNumToStringWithConstDefault(in, out_untyped)) { - throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}", + in->getName(), getName()); } } else @@ -256,8 +264,8 @@ private: && !executeNumToDecimalWithNonConstDefault(in, out_untyped, default_untyped) && !executeNumToStringWithNonConstDefault(in, out_untyped, default_untyped)) { - throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}", + in->getName(), getName()); } } @@ -277,8 +285,10 @@ private: auto out = typeid_cast *>(out_untyped); if (!out) { - throw Exception{"Illegal column " + out_untyped->getName() + " of elements of array of third argument of function " + getName() - + ", must be " + in->getName(), ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Illegal column {} of elements " + "of array of third argument of function {}, must be {}", + out_untyped->getName(), getName(), in->getName()); } executeImplNumToNum(in->getData(), out->getData()); @@ -299,8 +309,8 @@ private: && !executeDecimalToDecimalWithConstDefault(in, out_untyped) && !executeDecimalToStringWithConstDefault(in, out_untyped)) { - throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}", + in->getName(), getName()); } } else @@ -319,8 +329,8 @@ private: && !executeDecimalToDecimalWithNonConstDefault(in, out_untyped, default_untyped) && !executeDecimalToStringWithNonConstDefault(in, out_untyped, default_untyped)) { - throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}", + in->getName(), getName()); } } @@ -337,8 +347,8 @@ private: if (!default_untyped) { if (!executeStringToString(in, out_untyped)) - throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}", + in->getName(), getName()); } else if (isColumnConst(*default_untyped)) { @@ -356,8 +366,8 @@ private: && !executeStringToDecimalWithConstDefault(in, out_untyped) && !executeStringToStringWithConstDefault(in, out_untyped)) { - throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}", + in->getName(), getName()); } } else @@ -377,8 +387,8 @@ private: && !executeStringToStringWithNonConstDefault(in, out_untyped, default_untyped)) { - throw Exception{"Illegal column " + in->getName() + " of elements of array of second argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of elements of array of second argument of function {}", + in->getName(), getName()); } } @@ -451,9 +461,8 @@ private: && !executeNumToNumWithNonConstDefault2(in, out, default_untyped) && !executeNumToNumWithNonConstDefault2(in, out, default_untyped)) { - throw Exception( - "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } return true; @@ -479,9 +488,8 @@ private: && !executeNumToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped) && !executeNumToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped)) { - throw Exception( - "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } return true; @@ -507,9 +515,8 @@ private: && !executeDecimalToNumWithNonConstDefaultDecimal2(in, out, default_untyped) && !executeDecimalToNumWithNonConstDefaultDecimal2(in, out, default_untyped)) { - throw Exception( - "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } return true; @@ -535,9 +542,8 @@ private: && !executeDecimalToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped) && !executeDecimalToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped)) { - throw Exception( - "Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } return true; @@ -656,8 +662,8 @@ private: const auto * default_col = checkAndGetColumn(default_untyped); if (!default_col) { - throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } executeImplNumToStringWithNonConstDefault( @@ -678,8 +684,8 @@ private: const auto * default_col = checkAndGetColumn(default_untyped); if (!default_col) { - throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } executeImplNumToStringWithNonConstDefault( @@ -731,8 +737,8 @@ private: && !executeStringToNumWithNonConstDefault2(in, out, default_untyped) && !executeStringToNumWithNonConstDefault2(in, out, default_untyped)) { - throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } return true; @@ -758,8 +764,8 @@ private: && !executeStringToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped) && !executeStringToDecimalWithNonConstDefaultDecimal2(in, out, default_untyped)) { - throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } return true; @@ -830,8 +836,8 @@ private: const auto * default_col = checkAndGetColumn(default_untyped); if (!default_col) { - throw Exception{"Illegal column " + default_untyped->getName() + " of fourth argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of fourth argument of function {}", + default_untyped->getName(), getName()); } executeImplStringToStringWithNonConstDefault( @@ -853,7 +859,12 @@ private: { const auto * it = table.find(bit_cast(src[i])); if (it) - memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian. + { + if (std::endian::native == std::endian::little) + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + else + memcpy(&dst[i], reinterpret_cast(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i])); + } else dst[i] = dst_default; } @@ -869,7 +880,12 @@ private: { const auto * it = table.find(bit_cast(src[i])); if (it) - memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); /// little endian. + { + if (std::endian::native == std::endian::little) + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + else + memcpy(&dst[i], reinterpret_cast(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i])); + } else if constexpr (is_decimal) dst[i] = static_cast(dst_default[i]); else @@ -887,7 +903,12 @@ private: { const auto * it = table.find(bit_cast(src[i])); if (it) - memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + { + if (std::endian::native == std::endian::little) + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + else + memcpy(&dst[i], reinterpret_cast(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i])); + } else dst[i] = src[i]; } @@ -958,7 +979,12 @@ private: current_src_offset = src_offsets[i]; const auto * it = table.find(ref); if (it) - memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + { + if (std::endian::native == std::endian::little) + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + else + memcpy(&dst[i], reinterpret_cast(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i])); + } else dst[i] = dst_default; } @@ -979,7 +1005,12 @@ private: current_src_offset = src_offsets[i]; const auto * it = table.find(ref); if (it) - memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + { + if (std::endian::native == std::endian::little) + memcpy(&dst[i], &it->getMapped(), sizeof(dst[i])); + else + memcpy(&dst[i], reinterpret_cast(&it->getMapped()) + sizeof(UInt64) - sizeof(dst[i]), sizeof(dst[i])); + } else if constexpr (is_decimal) dst[i] = static_cast(dst_default[i]); else @@ -1098,7 +1129,7 @@ private: case Field::Types::Decimal32: return x.get>().getValue(); case Field::Types::Decimal64: return x.get>().getValue(); default: - throw Exception("Unexpected type in function 'transform'", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type in function 'transform'"); } } @@ -1110,7 +1141,7 @@ private: const size_t size = from.size(); if (0 == size) - throw Exception{"Empty arrays are illegal in function " + getName(), ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty arrays are illegal in function {}", getName()); std::lock_guard lock(cache.mutex); @@ -1118,7 +1149,7 @@ private: return; if (size != to.size()) - throw Exception{"Second and third arguments of function " + getName() + " must be arrays of same size", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments of function {} must be arrays of same size", getName()); Array converted_to; const Array * used_to = &to; diff --git a/src/Functions/translate.cpp b/src/Functions/translate.cpp index 7471fdacbb5..83779eee23c 100644 --- a/src/Functions/translate.cpp +++ b/src/Functions/translate.cpp @@ -27,14 +27,14 @@ struct TranslateImpl const std::string & map_to) { if (map_from.size() != map_to.size()) - throw Exception("Second and third arguments must be the same length", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments must be the same length"); std::iota(map.begin(), map.end(), 0); for (size_t i = 0; i < map_from.size(); ++i) { if (!isASCII(map_from[i]) || !isASCII(map_to[i])) - throw Exception("Second and third arguments must be ASCII strings", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments must be ASCII strings"); map[map_from[i]] = map_to[i]; } @@ -125,7 +125,7 @@ struct TranslateUTF8Impl auto map_to_size = UTF8::countCodePoints(reinterpret_cast(map_to.data()), map_to.size()); if (map_from_size != map_to_size) - throw Exception("Second and third arguments must be the same length", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second and third arguments must be the same length"); std::iota(map_ascii.begin(), map_ascii.end(), 0); @@ -148,10 +148,10 @@ struct TranslateUTF8Impl res_to = UTF8::convertUTF8ToCodePoint(map_to_ptr, len_to); if (!res_from) - throw Exception("Second argument must be a valid UTF-8 string", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be a valid UTF-8 string"); if (!res_to) - throw Exception("Third argument must be a valid UTF-8 string", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Third argument must be a valid UTF-8 string"); if (*map_from_ptr <= ascii_upper_bound) map_ascii[*map_from_ptr] = *res_to; @@ -257,7 +257,7 @@ struct TranslateUTF8Impl const std::string & /*map_to*/, ColumnString::Chars & /*res_data*/) { - throw Exception("Function translateUTF8 does not support FixedString argument", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Function translateUTF8 does not support FixedString argument"); } private: @@ -284,19 +284,16 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isStringOrFixedString(arguments[0])) - throw Exception( - "Illegal type " + arguments[0]->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {}", + arguments[0]->getName(), getName()); if (!isStringOrFixedString(arguments[1])) - throw Exception( - "Illegal type " + arguments[1]->getName() + " of second argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of second argument of function {}", + arguments[1]->getName(), getName()); if (!isStringOrFixedString(arguments[2])) - throw Exception( - "Illegal type " + arguments[2]->getName() + " of third argument of function " + getName(), - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of third argument of function {}", + arguments[2]->getName(), getName()); return std::make_shared(); } @@ -308,7 +305,7 @@ public: const ColumnPtr column_map_to = arguments[2].column; if (!isColumnConst(*column_map_from) || !isColumnConst(*column_map_to)) - throw Exception("2nd and 3rd arguments of function " + getName() + " must be constants.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "2nd and 3rd arguments of function {} must be constants.", getName()); const IColumn * c1 = arguments[1].column.get(); const IColumn * c2 = arguments[2].column.get(); @@ -330,9 +327,8 @@ public: return col_res; } else - throw Exception( - "Illegal column " + arguments[0].column->getName() + " of first argument of function " + getName(), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", + arguments[0].column->getName(), getName()); } }; diff --git a/src/Functions/trap.cpp b/src/Functions/trap.cpp index 16fada0f67d..44deb901b0d 100644 --- a/src/Functions/trap.cpp +++ b/src/Functions/trap.cpp @@ -60,7 +60,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (!isString(arguments[0])) - throw Exception("The only argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The only argument for function {} must be constant String", getName()); return std::make_shared(); } @@ -175,10 +175,10 @@ public: throw Exception(ErrorCodes::CANNOT_DLOPEN, "Cannot dlopen: ({})", dlerror()); // NOLINT(concurrency-mt-unsafe) // MT-Safe on Linux, see man dlerror } else - throw Exception("Unknown trap mode", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown trap mode"); } else - throw Exception("The only argument for function " + getName() + " must be constant String", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "The only argument for function {} must be constant String", getName()); return result_type->createColumnConst(input_rows_count, 0ULL); } diff --git a/src/Functions/trim.cpp b/src/Functions/trim.cpp index 58760583e8d..acfab47a68b 100644 --- a/src/Functions/trim.cpp +++ b/src/Functions/trim.cpp @@ -71,7 +71,7 @@ public: static void vectorFixed(const ColumnString::Chars &, size_t, ColumnString::Chars &) { - throw Exception("Functions trimLeft, trimRight and trimBoth cannot work with FixedString argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Functions trimLeft, trimRight and trimBoth cannot work with FixedString argument"); } private: diff --git a/src/Functions/tuple.cpp b/src/Functions/tuple.cpp index 545c6597811..f6c2831365f 100644 --- a/src/Functions/tuple.cpp +++ b/src/Functions/tuple.cpp @@ -60,7 +60,7 @@ public: DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override { if (arguments.empty()) - throw Exception("Function " + getName() + " requires at least one argument.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} requires at least one argument.", getName()); return std::make_shared(arguments); } diff --git a/src/Functions/tupleElement.cpp b/src/Functions/tupleElement.cpp index 6ac36dc80ed..829262de30a 100644 --- a/src/Functions/tupleElement.cpp +++ b/src/Functions/tupleElement.cpp @@ -68,9 +68,9 @@ public: const size_t number_of_arguments = arguments.size(); if (number_of_arguments < 2 || number_of_arguments > 3) - throw Exception("Number of arguments for function " + getName() + " doesn't match: passed " - + toString(number_of_arguments) + ", should be 2 or 3", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function {} doesn't match: passed {}, should be 2 or 3", + getName(), number_of_arguments); size_t count_arrays = 0; const IDataType * tuple_col = arguments[0].type.get(); @@ -108,7 +108,10 @@ public: if (count_arrays != default_argument_count_arrays) { - throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, "Dimension of types mismatched between first argument and third argument. Dimension of 1st argument: {}. Dimension of 3rd argument: {}.",count_arrays, default_argument_count_arrays); + throw Exception(ErrorCodes::NUMBER_OF_DIMENSIONS_MISMATCHED, + "Dimension of types mismatched between first argument and third argument. " + "Dimension of 1st argument: {}. " + "Dimension of 3rd argument: {}.",count_arrays, default_argument_count_arrays); } return arguments[2].type; } @@ -198,7 +201,8 @@ private: const auto & array_y = *assert_cast(col_y.get()); if (!array_x.hasEqualOffsets(array_y)) { - throw Exception("The argument 1 and argument 3 of function " + getName() + " have different array sizes", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "The argument 1 and argument 3 of function {} have different array sizes", getName()); } } } @@ -219,7 +223,8 @@ private: { if (unlikely(offsets_x[0] != offsets_y[row] - prev_offset)) { - throw Exception("The argument 1 and argument 3 of function " + getName() + " have different array sizes", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, + "The argument 1 and argument 3 of function {} have different array sizes", getName()); } prev_offset = offsets_y[row]; } @@ -252,7 +257,7 @@ private: if (argument_size == 2) { - throw Exception("Tuple doesn't have element with name '" + name_col->getValue() + "'", ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Tuple doesn't have element with name '{}'", name_col->getValue()); } return std::nullopt; } diff --git a/src/Functions/validateNestedArraySizes.cpp b/src/Functions/validateNestedArraySizes.cpp index 3b005f4d653..7e1dbc798d8 100644 --- a/src/Functions/validateNestedArraySizes.cpp +++ b/src/Functions/validateNestedArraySizes.cpp @@ -40,19 +40,17 @@ DataTypePtr FunctionValidateNestedArraySizes::getReturnTypeImpl(const DataTypes size_t num_args = arguments.size(); if (num_args < 3) - throw Exception( - "Function " + getName() + " needs more than two arguments; passed " + toString(arguments.size()) + ".", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function {} needs more than two arguments; passed {}.", + getName(), arguments.size()); if (!WhichDataType(arguments[0]).isUInt8()) - throw Exception("Illegal type " + arguments[0]->getName() + " of first argument of function " + getName() + " Must be UInt.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of first argument of function {} Must be UInt.", + arguments[0]->getName(), getName()); for (size_t i = 1; i < num_args; ++i) if (!WhichDataType(arguments[i]).isArray()) - throw Exception( - "Illegal type " + arguments[i]->getName() + " of " + toString(i) + " argument of function " + getName() + " Must be Array.", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of {} argument of function {} Must be Array.", + arguments[i]->getName(), i, getName()); return std::make_shared(); } diff --git a/src/Functions/vectorFunctions.cpp b/src/Functions/vectorFunctions.cpp index 20835f59cc1..a7ab09612cf 100644 --- a/src/Functions/vectorFunctions.cpp +++ b/src/Functions/vectorFunctions.cpp @@ -1127,7 +1127,7 @@ public: const auto & p_column = arguments[1]; if (!isColumnConst(*p_column.column) && p_column.column->size() != 1) - throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be either constant Float64 or constant UInt", getName()); double p; if (isFloat(p_column.column->getDataType())) @@ -1135,10 +1135,12 @@ public: else if (isUnsignedInteger(p_column.column->getDataType())) p = p_column.column->getUInt(0); else - throw Exception{"Second argument for function " + getName() + " must be either constant Float64 or constant UInt", ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument for function {} must be either constant Float64 or constant UInt", getName()); if (p < 1 || p >= HUGE_VAL) - throw Exception{"Second argument for function " + getName() + " must be not less than one and not be an infinity", ErrorCodes::ARGUMENT_OUT_OF_BOUND}; + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, + "Second argument for function {} must be not less than one and not be an infinity", + getName()); auto abs = FunctionFactory::instance().get("abs", context); auto pow = FunctionFactory::instance().get("pow", context); diff --git a/src/IO/Archives/ZipArchiveReader.cpp b/src/IO/Archives/ZipArchiveReader.cpp index a7c72c7b575..206c2c45ee5 100644 --- a/src/IO/Archives/ZipArchiveReader.cpp +++ b/src/IO/Archives/ZipArchiveReader.cpp @@ -218,13 +218,13 @@ public: else if (whence == SEEK_CUR) new_pos = off + current_pos; else - throw Exception("Only SEEK_SET and SEEK_CUR seek modes allowed.", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Only SEEK_SET and SEEK_CUR seek modes allowed."); if (new_pos == current_pos) return current_pos; /// The position is the same. if (new_pos < 0) - throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound"); off_t working_buffer_start_pos = current_pos - offset(); off_t working_buffer_end_pos = current_pos + available(); @@ -241,7 +241,7 @@ public: /// Check that the new position is now beyond the end of the file. const auto & file_info = handle.getFileInfo(); if (new_pos > static_cast(file_info.uncompressed_size)) - throw Exception("Seek position is out of bound", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bound"); if (file_info.compression_method == MZ_COMPRESS_METHOD_STORE) { @@ -489,7 +489,7 @@ std::unique_ptr ZipArchiveReader::readFile(const String std::unique_ptr ZipArchiveReader::readFile(std::unique_ptr enumerator) { if (!dynamic_cast(enumerator.get())) - throw Exception("Wrong enumerator passed to readFile()", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong enumerator passed to readFile()"); auto enumerator_impl = std::unique_ptr(static_cast(enumerator.release())); auto handle = std::move(*enumerator_impl).releaseHandle(); return std::make_unique(std::move(handle)); @@ -498,7 +498,7 @@ std::unique_ptr ZipArchiveReader::readFile(std::unique_p std::unique_ptr ZipArchiveReader::nextFile(std::unique_ptr read_buffer) { if (!dynamic_cast(read_buffer.get())) - throw Exception("Wrong ReadBuffer passed to nextFile()", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong ReadBuffer passed to nextFile()"); auto read_buffer_from_zip = std::unique_ptr(static_cast(read_buffer.release())); auto handle = std::move(*read_buffer_from_zip).releaseHandle(); if (!handle.nextFile()) diff --git a/src/IO/Archives/ZipArchiveWriter.cpp b/src/IO/Archives/ZipArchiveWriter.cpp index d413783356d..088d83cd29e 100644 --- a/src/IO/Archives/ZipArchiveWriter.cpp +++ b/src/IO/Archives/ZipArchiveWriter.cpp @@ -343,7 +343,7 @@ void ZipArchiveWriter::checkCompressionMethodIsEnabled(int compression_method_) #if USE_BZIP2 return; #else - throw Exception("bzip2 compression method is disabled", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "bzip2 compression method is disabled"); #endif } } @@ -354,7 +354,7 @@ void ZipArchiveWriter::checkCompressionMethodIsEnabled(int compression_method_) void ZipArchiveWriter::checkEncryptionIsEnabled() { #if !USE_SSL - throw Exception("Encryption in zip archive is disabled", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Encryption in zip archive is disabled"); #endif } diff --git a/src/IO/Archives/createArchiveReader.cpp b/src/IO/Archives/createArchiveReader.cpp index ea51d44399e..3cb4802792b 100644 --- a/src/IO/Archives/createArchiveReader.cpp +++ b/src/IO/Archives/createArchiveReader.cpp @@ -28,7 +28,7 @@ std::shared_ptr createArchiveReader( #if USE_MINIZIP return std::make_shared(path_to_archive, archive_read_function, archive_size); #else - throw Exception("minizip library is disabled", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); #endif } else diff --git a/src/IO/Archives/createArchiveWriter.cpp b/src/IO/Archives/createArchiveWriter.cpp index 573e36861ed..807fe66e6a9 100644 --- a/src/IO/Archives/createArchiveWriter.cpp +++ b/src/IO/Archives/createArchiveWriter.cpp @@ -28,7 +28,7 @@ std::shared_ptr createArchiveWriter( #if USE_MINIZIP return std::make_shared(path_to_archive, std::move(archive_write_buffer)); #else - throw Exception("minizip library is disabled", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "minizip library is disabled"); #endif } else diff --git a/src/IO/AsynchronousReadBufferFromFile.cpp b/src/IO/AsynchronousReadBufferFromFile.cpp index 8310d80b461..9788fd87309 100644 --- a/src/IO/AsynchronousReadBufferFromFile.cpp +++ b/src/IO/AsynchronousReadBufferFromFile.cpp @@ -91,7 +91,7 @@ void AsynchronousReadBufferFromFile::close() return; if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); fd = -1; } diff --git a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp index e07e04dedb8..a89961dea45 100644 --- a/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp +++ b/src/IO/AsynchronousReadBufferFromFileDescriptor.cpp @@ -186,7 +186,7 @@ off_t AsynchronousReadBufferFromFileDescriptor::seek(off_t offset, int whence) } else { - throw Exception("ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence"); } /// Position is unchanged. @@ -232,7 +232,9 @@ off_t AsynchronousReadBufferFromFileDescriptor::seek(off_t offset, int whence) bytes_to_ignore = new_pos - seek_pos; if (bytes_to_ignore >= internal_buffer.size()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in AsynchronousReadBufferFromFileDescriptor, bytes_to_ignore ({}) >= internal_buffer.size() ({})", bytes_to_ignore, internal_buffer.size()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Logical error in AsynchronousReadBufferFromFileDescriptor, bytes_to_ignore ({}" + ") >= internal_buffer.size() ({})", bytes_to_ignore, internal_buffer.size()); return seek_pos; } diff --git a/src/IO/BitHelpers.h b/src/IO/BitHelpers.h index c67b44bc325..34173ccd8f9 100644 --- a/src/IO/BitHelpers.h +++ b/src/IO/BitHelpers.h @@ -35,28 +35,26 @@ extern const int ATTEMPT_TO_READ_AFTER_EOF; class BitReader { - using BufferType = unsigned __int128; - - const char * source_begin; + const char * const source_begin; + const char * const source_end; const char * source_current; - const char * source_end; - BufferType bits_buffer; - UInt8 bits_count; + using BufferType = unsigned __int128; + BufferType bits_buffer = 0; + + UInt8 bits_count = 0; public: BitReader(const char * begin, size_t size) - : source_begin(begin), - source_current(begin), - source_end(begin + size), - bits_buffer(0), - bits_count(0) + : source_begin(begin) + , source_end(begin + size) + , source_current(begin) {} ~BitReader() = default; // reads bits_to_read high-bits from bits_buffer - ALWAYS_INLINE inline UInt64 readBits(UInt8 bits_to_read) + ALWAYS_INLINE UInt64 readBits(UInt8 bits_to_read) { if (bits_to_read > bits_count) fillBitBuffer(); @@ -64,7 +62,7 @@ public: return getBitsFromBitBuffer(bits_to_read); } - inline UInt8 peekByte() + UInt8 peekByte() { if (bits_count < 8) fillBitBuffer(); @@ -72,31 +70,31 @@ public: return getBitsFromBitBuffer(8); } - ALWAYS_INLINE inline UInt8 readBit() + ALWAYS_INLINE UInt8 readBit() { return static_cast(readBits(1)); } // skip bits from bits_buffer - inline void skipBufferedBits(UInt8 bits) + void skipBufferedBits(UInt8 bits) { bits_buffer <<= bits; bits_count -= bits; } - inline bool eof() const + bool eof() const { return bits_count == 0 && source_current >= source_end; } // number of bits that was already read by clients with readBits() - inline UInt64 count() const + UInt64 count() const { return (source_current - source_begin) * 8 - bits_count; } - inline UInt64 remaining() const + UInt64 remaining() const { return (source_end - source_current) * 8 + bits_count; } @@ -105,7 +103,7 @@ private: enum GetBitsMode {CONSUME, PEEK}; // read data from internal buffer, if it has not enough bits, result is undefined. template - inline UInt64 getBitsFromBitBuffer(UInt8 bits_to_read) + UInt64 getBitsFromBitBuffer(UInt8 bits_to_read) { assert(bits_to_read > 0); @@ -132,9 +130,8 @@ private: if (bytes_to_read == 0) return 0; - throw Exception("Buffer is empty, but requested to read " - + std::to_string(bytes_to_read) + " more bytes.", - ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF); + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Buffer is empty, but requested to read {} more bytes.", + bytes_to_read); } UInt64 tmp_buffer = 0; @@ -153,24 +150,22 @@ private: class BitWriter { - using BufferType = unsigned __int128; - char * dest_begin; - char * dest_current; char * dest_end; + char * dest_current; - BufferType bits_buffer; - UInt8 bits_count; + using BufferType = unsigned __int128; + BufferType bits_buffer = 0; + + UInt8 bits_count = 0; static constexpr UInt8 BIT_BUFFER_SIZE = sizeof(bits_buffer) * 8; public: BitWriter(char * begin, size_t size) - : dest_begin(begin), - dest_current(begin), - dest_end(begin + size), - bits_buffer(0), - bits_count(0) + : dest_begin(begin) + , dest_end(begin + size) + , dest_current(begin) {} ~BitWriter() @@ -179,7 +174,7 @@ public: } // write `bits_to_write` low-bits of `value` to the buffer - inline void writeBits(UInt8 bits_to_write, UInt64 value) + void writeBits(UInt8 bits_to_write, UInt64 value) { assert(bits_to_write > 0); @@ -200,14 +195,14 @@ public: } // flush contents of bits_buffer to the dest_current, partial bytes are completed with zeroes. - inline void flush() + void flush() { bits_count = (bits_count + 8 - 1) & ~(8 - 1); // align up to 8-bytes, so doFlush will write all data from bits_buffer while (bits_count != 0) doFlush(); } - inline UInt64 count() const + UInt64 count() const { return (dest_current - dest_begin) * 8 + bits_count; } diff --git a/src/IO/BoundedReadBuffer.cpp b/src/IO/BoundedReadBuffer.cpp index 995ee267699..f4467bb2c0c 100644 --- a/src/IO/BoundedReadBuffer.cpp +++ b/src/IO/BoundedReadBuffer.cpp @@ -1,4 +1,5 @@ #include "BoundedReadBuffer.h" +#include namespace DB { @@ -36,10 +37,12 @@ bool BoundedReadBuffer::nextImpl() if (read_until_position && file_offset_of_buffer_end == *read_until_position) return false; - swap(*impl); - auto result = impl->next(); - swap(*impl); - + bool result; + { + SwapHelper swap(*this, *impl); + result = impl->next(); + } + chassert(file_offset_of_buffer_end + available() == impl->getFileOffsetOfBufferEnd()); if (result && read_until_position) { size_t remaining_size_to_read = *read_until_position - file_offset_of_buffer_end; @@ -64,7 +67,7 @@ off_t BoundedReadBuffer::seek(off_t off, int whence) auto result = impl->seek(off, whence); swap(*impl); - file_offset_of_buffer_end = result; + file_offset_of_buffer_end = impl->getFileOffsetOfBufferEnd(); return result; } diff --git a/src/IO/BrotliReadBuffer.cpp b/src/IO/BrotliReadBuffer.cpp index c16b7c2b397..56ef2b5446b 100644 --- a/src/IO/BrotliReadBuffer.cpp +++ b/src/IO/BrotliReadBuffer.cpp @@ -60,7 +60,7 @@ bool BrotliReadBuffer::nextImpl() if (brotli->result == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT && (!in_available || in->eof())) { - throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED); + throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error"); } out_capacity = internal_buffer.size(); @@ -83,13 +83,13 @@ bool BrotliReadBuffer::nextImpl() } else { - throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED); + throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error"); } } if (brotli->result == BROTLI_DECODER_RESULT_ERROR) { - throw Exception("brotli decode error", ErrorCodes::BROTLI_READ_FAILED); + throw Exception(ErrorCodes::BROTLI_READ_FAILED, "brotli decode error"); } return true; diff --git a/src/IO/BrotliWriteBuffer.cpp b/src/IO/BrotliWriteBuffer.cpp index e764b58ccd4..47426d62a6e 100644 --- a/src/IO/BrotliWriteBuffer.cpp +++ b/src/IO/BrotliWriteBuffer.cpp @@ -78,7 +78,7 @@ void BrotliWriteBuffer::nextImpl() if (result == 0) { - throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED); + throw Exception(ErrorCodes::BROTLI_WRITE_FAILED, "brotli compress failed"); } } while (in_available > 0); @@ -119,7 +119,7 @@ void BrotliWriteBuffer::finalizeBefore() if (result == 0) { - throw Exception("brotli compress failed", ErrorCodes::BROTLI_WRITE_FAILED); + throw Exception(ErrorCodes::BROTLI_WRITE_FAILED, "brotli compress failed"); } } } diff --git a/src/IO/BufferWithOwnMemory.h b/src/IO/BufferWithOwnMemory.h index 2e451e0032e..39c83e9167c 100644 --- a/src/IO/BufferWithOwnMemory.h +++ b/src/IO/BufferWithOwnMemory.h @@ -109,7 +109,7 @@ private: size_t res = 0; if (common::addOverflow(value, pad_right, res)) - throw Exception("value is too big to apply padding", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "value is too big to apply padding"); return res; } diff --git a/src/IO/CascadeWriteBuffer.cpp b/src/IO/CascadeWriteBuffer.cpp index ca11290c71b..629cbff90af 100644 --- a/src/IO/CascadeWriteBuffer.cpp +++ b/src/IO/CascadeWriteBuffer.cpp @@ -78,11 +78,11 @@ WriteBuffer * CascadeWriteBuffer::setNextBuffer() } } else if (curr_buffer_num >= num_sources) - throw Exception("There are no WriteBuffers to write result", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER); + throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, "There are no WriteBuffers to write result"); WriteBuffer * res = prepared_sources[curr_buffer_num].get(); if (!res) - throw Exception("Required WriteBuffer is not created", ErrorCodes::CANNOT_CREATE_IO_BUFFER); + throw Exception(ErrorCodes::CANNOT_CREATE_IO_BUFFER, "Required WriteBuffer is not created"); /// Check that returned buffer isn't empty if (!res->hasPendingData()) diff --git a/src/IO/CompressionMethod.cpp b/src/IO/CompressionMethod.cpp index e38cd811187..13e1adbb702 100644 --- a/src/IO/CompressionMethod.cpp +++ b/src/IO/CompressionMethod.cpp @@ -117,9 +117,8 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s if (hint.empty() || hint == "auto" || hint == "none") return CompressionMethod::None; - throw Exception( - "Unknown compression method '" + hint + "'. Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'lz4', 'bz2', 'snappy' are supported as compression methods", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unknown compression method '{}'. " + "Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'lz4', 'bz2', 'snappy' are supported as compression methods", hint); } std::pair getCompressionLevelRange(const CompressionMethod & method) @@ -159,7 +158,7 @@ static std::unique_ptr createCompressedWrapper( return std::make_unique(std::move(nested), buf_size, existing_memory, alignment); #endif - throw Exception("Unsupported compression method", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); } std::unique_ptr wrapReadBufferWithCompressionMethod( @@ -195,12 +194,12 @@ std::unique_ptr wrapWriteBufferWithCompressionMethod( #endif #if USE_SNAPPY if (method == CompressionMethod::Snappy) - throw Exception("Unsupported compression method", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); #endif if (method == CompressionMethod::None) return nested; - throw Exception("Unsupported compression method", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported compression method"); } } diff --git a/src/IO/ConcatSeekableReadBuffer.cpp b/src/IO/ConcatSeekableReadBuffer.cpp index 0943d1eac45..ec2793898fe 100644 --- a/src/IO/ConcatSeekableReadBuffer.cpp +++ b/src/IO/ConcatSeekableReadBuffer.cpp @@ -104,12 +104,12 @@ off_t ConcatSeekableReadBuffer::seek(off_t off, int whence) else if (whence == SEEK_CUR) new_position = current_position + off; else - throw Exception("ConcatSeekableReadBuffer::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "ConcatSeekableReadBuffer::seek expects SEEK_SET or SEEK_CUR as whence"); if (new_position < 0) - throw Exception("SEEK_SET underflow: off = " + std::to_string(off), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "SEEK_SET underflow: off = {}", off); if (static_cast(new_position) > total_size) - throw Exception("SEEK_CUR shift out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "SEEK_CUR shift out of bounds"); if (static_cast(new_position) == total_size) { diff --git a/src/IO/FileEncryptionCommon.cpp b/src/IO/FileEncryptionCommon.cpp index 5592da8721c..4ac4d289b32 100644 --- a/src/IO/FileEncryptionCommon.cpp +++ b/src/IO/FileEncryptionCommon.cpp @@ -38,7 +38,7 @@ namespace throw Exception( ErrorCodes::BAD_ARGUMENTS, "Encryption algorithm {} is not supported, specify one of the following: aes_128_ctr, aes_192_ctr, aes_256_ctr", - std::to_string(static_cast(algorithm))); + static_cast(algorithm)); } void checkKeySize(const EVP_CIPHER * evp_cipher, size_t key_size) @@ -48,7 +48,9 @@ namespace size_t expected_key_size = static_cast(EVP_CIPHER_key_length(evp_cipher)); if (key_size != expected_key_size) throw Exception( - ErrorCodes::BAD_ARGUMENTS, "Got an encryption key with unexpected size {}, the size should be {}", key_size, expected_key_size); + ErrorCodes::BAD_ARGUMENTS, + "Got an encryption key with unexpected size {}, the size should be {}", + key_size, expected_key_size); } void checkInitVectorSize(const EVP_CIPHER * evp_cipher) @@ -92,7 +94,7 @@ namespace uint8_t * ciphertext = reinterpret_cast(out.position()); int ciphertext_size = 0; if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, &in[in_size], static_cast(part_size))) - throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to encrypt"); in_size += part_size; if (ciphertext_size) @@ -115,7 +117,7 @@ namespace uint8_t ciphertext[kBlockSize]; int ciphertext_size = 0; if (!EVP_EncryptUpdate(evp_ctx, ciphertext, &ciphertext_size, padded_data, safe_cast(padded_data_size))) - throw Exception("Failed to encrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to encrypt"); if (!ciphertext_size) return 0; @@ -135,7 +137,7 @@ namespace int ciphertext_size = 0; if (!EVP_EncryptFinal_ex(evp_ctx, ciphertext, &ciphertext_size)) - throw Exception("Failed to finalize encrypting", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to finalize encrypting"); if (ciphertext_size) out.write(reinterpret_cast(ciphertext), ciphertext_size); return ciphertext_size; @@ -147,7 +149,7 @@ namespace uint8_t * plaintext = reinterpret_cast(out); int plaintext_size = 0; if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, in, safe_cast(size))) - throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to decrypt"); return plaintext_size; } @@ -160,7 +162,7 @@ namespace uint8_t plaintext[kBlockSize]; int plaintext_size = 0; if (!EVP_DecryptUpdate(evp_ctx, plaintext, &plaintext_size, padded_data, safe_cast(padded_data_size))) - throw Exception("Failed to decrypt", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to decrypt"); if (!plaintext_size) return 0; @@ -179,7 +181,7 @@ namespace uint8_t plaintext[kBlockSize]; int plaintext_size = 0; if (!EVP_DecryptFinal_ex(evp_ctx, plaintext, &plaintext_size)) - throw Exception("Failed to finalize decrypting", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to finalize decrypting"); if (plaintext_size) memcpy(out, plaintext, plaintext_size); return plaintext_size; @@ -201,7 +203,7 @@ String toString(Algorithm algorithm) throw Exception( ErrorCodes::BAD_ARGUMENTS, "Encryption algorithm {} is not supported, specify one of the following: aes_128_ctr, aes_192_ctr, aes_256_ctr", - std::to_string(static_cast(algorithm))); + static_cast(algorithm)); } void parseFromString(Algorithm & algorithm, const String & str) @@ -282,11 +284,11 @@ void Encryptor::encrypt(const char * data, size_t size, WriteBuffer & out) auto * evp_ctx = evp_ctx_ptr.get(); if (!EVP_EncryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr)) - throw Exception("Failed to initialize encryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to initialize encryption context with cipher"); if (!EVP_EncryptInit_ex(evp_ctx, nullptr, nullptr, reinterpret_cast(key.c_str()), reinterpret_cast(current_iv.c_str()))) - throw Exception("Failed to set key and IV for encryption", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to set key and IV for encryption"); size_t in_size = 0; size_t out_size = 0; @@ -311,7 +313,7 @@ void Encryptor::encrypt(const char * data, size_t size, WriteBuffer & out) out_size += encryptFinal(evp_ctx, out); if (out_size != in_size) - throw Exception("Only part of the data was encrypted", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Only part of the data was encrypted"); offset += in_size; } @@ -326,11 +328,11 @@ void Encryptor::decrypt(const char * data, size_t size, char * out) auto * evp_ctx = evp_ctx_ptr.get(); if (!EVP_DecryptInit_ex(evp_ctx, evp_cipher, nullptr, nullptr, nullptr)) - throw Exception("Failed to initialize decryption context with cipher", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to initialize decryption context with cipher"); if (!EVP_DecryptInit_ex(evp_ctx, nullptr, nullptr, reinterpret_cast(key.c_str()), reinterpret_cast(current_iv.c_str()))) - throw Exception("Failed to set key and IV for decryption", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Failed to set key and IV for decryption"); size_t in_size = 0; size_t out_size = 0; @@ -355,7 +357,7 @@ void Encryptor::decrypt(const char * data, size_t size, char * out) out_size += decryptFinal(evp_ctx, &out[out_size]); if (out_size != in_size) - throw Exception("Only part of the data was decrypted", ErrorCodes::DATA_ENCRYPTION_ERROR); + throw Exception(ErrorCodes::DATA_ENCRYPTION_ERROR, "Only part of the data was decrypted"); offset += in_size; } diff --git a/src/IO/ForkWriteBuffer.cpp b/src/IO/ForkWriteBuffer.cpp index e4868d5c9a8..8e11b9ff590 100644 --- a/src/IO/ForkWriteBuffer.cpp +++ b/src/IO/ForkWriteBuffer.cpp @@ -14,7 +14,7 @@ ForkWriteBuffer::ForkWriteBuffer(WriteBufferPtrs && sources_) { if (sources.empty()) { - throw Exception("Expected non-zero number of buffers for `ForkWriteBuffer`", ErrorCodes::CANNOT_CREATE_IO_BUFFER); + throw Exception(ErrorCodes::CANNOT_CREATE_IO_BUFFER, "Expected non-zero number of buffers for `ForkWriteBuffer`"); } set(sources.front()->buffer().begin(), sources.front()->buffer().size()); } diff --git a/src/IO/HTTPChunkedReadBuffer.cpp b/src/IO/HTTPChunkedReadBuffer.cpp index a7841b1180f..b9c42088c41 100644 --- a/src/IO/HTTPChunkedReadBuffer.cpp +++ b/src/IO/HTTPChunkedReadBuffer.cpp @@ -19,28 +19,28 @@ namespace ErrorCodes size_t HTTPChunkedReadBuffer::readChunkHeader() { if (in->eof()) - throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE); + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of file while reading chunk header of HTTP chunked data"); if (!isHexDigit(*in->position())) - throw Exception("Unexpected data instead of HTTP chunk header", ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Unexpected data instead of HTTP chunk header"); size_t res = 0; do { if (common::mulOverflow(res, 16ul, res) || common::addOverflow(res, unhex(*in->position()), res)) - throw Exception("Chunk size is out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Chunk size is out of bounds"); ++in->position(); } while (!in->eof() && isHexDigit(*in->position())); if (res > max_chunk_size) - throw Exception("Chunk size exceeded the limit", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Chunk size exceeded the limit"); /// NOTE: If we want to read any chunk extensions, it should be done here. skipToCarriageReturnOrEOF(*in); if (in->eof()) - throw Exception("Unexpected end of file while reading chunk header of HTTP chunked data", ErrorCodes::UNEXPECTED_END_OF_FILE); + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of file while reading chunk header of HTTP chunked data"); assertString("\n", *in); return res; diff --git a/src/IO/HTTPCommon.cpp b/src/IO/HTTPCommon.cpp index 08a5effb013..551ce797757 100644 --- a/src/IO/HTTPCommon.cpp +++ b/src/IO/HTTPCommon.cpp @@ -60,7 +60,7 @@ namespace else if (uri.getScheme() == "http") return false; else - throw Exception("Unsupported scheme in URI '" + uri.toString() + "'", ErrorCodes::UNSUPPORTED_URI_SCHEME); + throw Exception(ErrorCodes::UNSUPPORTED_URI_SCHEME, "Unsupported scheme in URI '{}'", uri.toString()); } HTTPSessionPtr makeHTTPSessionImpl(const std::string & host, UInt16 port, bool https, bool keep_alive, bool resolve_host = true) @@ -77,7 +77,7 @@ namespace session = std::move(https_session); #else - throw Exception("ClickHouse was built without HTTPS support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); + throw Exception(ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME, "ClickHouse was built without HTTPS support"); #endif } else diff --git a/src/IO/HadoopSnappyReadBuffer.cpp b/src/IO/HadoopSnappyReadBuffer.cpp index de04fb39b13..6ba31997b37 100644 --- a/src/IO/HadoopSnappyReadBuffer.cpp +++ b/src/IO/HadoopSnappyReadBuffer.cpp @@ -89,7 +89,9 @@ inline HadoopSnappyDecoder::Status HadoopSnappyDecoder::readCompressedLength(siz { auto status = readLength(avail_in, next_in, &compressed_length); if (unlikely(compressed_length > 0 && static_cast(compressed_length) > sizeof(buffer))) - throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "Too large snappy compressed block. buffer size: {}, compressed block size: {}", sizeof(buffer), compressed_length); + throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, + "Too large snappy compressed block. buffer size: {}, compressed block size: {}", + sizeof(buffer), compressed_length); return status; } return Status::OK; @@ -194,7 +196,7 @@ bool HadoopSnappyReadBuffer::nextImpl() if (decoder->result == Status::NEEDS_MORE_INPUT && (!in_available || in->eof())) { - throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED); + throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result)); } out_capacity = internal_buffer.size(); @@ -219,7 +221,7 @@ bool HadoopSnappyReadBuffer::nextImpl() } else if (decoder->result == Status::INVALID_INPUT || decoder->result == Status::BUFFER_TOO_SMALL) { - throw Exception(String("hadoop snappy decode error:") + statusToString(decoder->result), ErrorCodes::SNAPPY_UNCOMPRESS_FAILED); + throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "hadoop snappy decode error: {}", statusToString(decoder->result)); } return true; } diff --git a/src/IO/HashingWriteBuffer.h b/src/IO/HashingWriteBuffer.h index 988dfc227fe..8edfa45a6be 100644 --- a/src/IO/HashingWriteBuffer.h +++ b/src/IO/HashingWriteBuffer.h @@ -25,7 +25,7 @@ public: uint128 getHash() { if (block_pos) - return CityHash_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory::memory[0], block_pos, state); + return CityHash_v1_0_2::CityHash128WithSeed(BufferWithOwnMemory::memory.data(), block_pos, state); else return state; } diff --git a/src/IO/LimitReadBuffer.cpp b/src/IO/LimitReadBuffer.cpp index 30914f9b798..6b3c383c753 100644 --- a/src/IO/LimitReadBuffer.cpp +++ b/src/IO/LimitReadBuffer.cpp @@ -22,7 +22,7 @@ bool LimitReadBuffer::nextImpl() if (bytes >= limit) { if (throw_exception) - throw Exception("Limit for LimitReadBuffer exceeded: " + exception_message, ErrorCodes::LIMIT_EXCEEDED); + throw Exception(ErrorCodes::LIMIT_EXCEEDED, "Limit for LimitReadBuffer exceeded: {}", exception_message); else return false; } diff --git a/src/IO/LimitSeekableReadBuffer.cpp b/src/IO/LimitSeekableReadBuffer.cpp index d37e8152f1c..acd7ba6eaa0 100644 --- a/src/IO/LimitSeekableReadBuffer.cpp +++ b/src/IO/LimitSeekableReadBuffer.cpp @@ -81,10 +81,10 @@ off_t LimitSeekableReadBuffer::seek(off_t off, int whence) else if (whence == SEEK_CUR) new_position = current_position + off; else - throw Exception("Seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Seek expects SEEK_SET or SEEK_CUR as whence"); if (new_position < 0 || new_position + min_offset > max_offset) - throw Exception("Seek shift out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Seek shift out of bounds"); off_t position_change = new_position - current_position; if ((buffer().begin() <= pos + position_change) && (pos + position_change <= buffer().end())) diff --git a/src/IO/Lz4DeflatingWriteBuffer.cpp b/src/IO/Lz4DeflatingWriteBuffer.cpp index c2eb175d5a9..c3a1b8282c3 100644 --- a/src/IO/Lz4DeflatingWriteBuffer.cpp +++ b/src/IO/Lz4DeflatingWriteBuffer.cpp @@ -37,8 +37,7 @@ Lz4DeflatingWriteBuffer::Lz4DeflatingWriteBuffer( throw Exception( ErrorCodes::LZ4_ENCODER_FAILED, "creation of LZ4 compression context failed. LZ4F version: {}", - LZ4F_VERSION, - ErrorCodes::LZ4_ENCODER_FAILED); + LZ4F_VERSION); } Lz4DeflatingWriteBuffer::~Lz4DeflatingWriteBuffer() diff --git a/src/IO/MMapReadBufferFromFile.cpp b/src/IO/MMapReadBufferFromFile.cpp index b3354c42fbb..86e05d7ae44 100644 --- a/src/IO/MMapReadBufferFromFile.cpp +++ b/src/IO/MMapReadBufferFromFile.cpp @@ -70,7 +70,7 @@ void MMapReadBufferFromFile::close() finish(); if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); fd = -1; metric_increment.destroy(); diff --git a/src/IO/MMappedFile.cpp b/src/IO/MMappedFile.cpp index edd2e5ef0ce..9e45140d5f9 100644 --- a/src/IO/MMappedFile.cpp +++ b/src/IO/MMappedFile.cpp @@ -69,7 +69,7 @@ void MMappedFile::close() finish(); if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); fd = -1; metric_increment.destroy(); diff --git a/src/IO/MMappedFileDescriptor.cpp b/src/IO/MMappedFileDescriptor.cpp index 205cede0993..9cc1aaf656c 100644 --- a/src/IO/MMappedFileDescriptor.cpp +++ b/src/IO/MMappedFileDescriptor.cpp @@ -33,7 +33,7 @@ static size_t getFileSize(int fd) off_t file_size = stat_res.st_size; if (file_size < 0) - throw Exception("MMappedFileDescriptor: fstat returned negative file size", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MMappedFileDescriptor: fstat returned negative file size"); return file_size; } @@ -77,7 +77,7 @@ void MMappedFileDescriptor::set(int fd_, size_t offset_) size_t file_size = getFileSize(fd_); if (offset > static_cast(file_size)) - throw Exception("MMappedFileDescriptor: requested offset is greater than file size", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "MMappedFileDescriptor: requested offset is greater than file size"); set(fd_, offset_, file_size - offset); } diff --git a/src/IO/MemoryReadWriteBuffer.cpp b/src/IO/MemoryReadWriteBuffer.cpp index 69bcd52a8d2..8958390fe03 100644 --- a/src/IO/MemoryReadWriteBuffer.cpp +++ b/src/IO/MemoryReadWriteBuffer.cpp @@ -118,7 +118,7 @@ void MemoryWriteBuffer::addChunk() if (0 == next_chunk_size) { set(position(), 0); - throw Exception("MemoryWriteBuffer limit is exhausted", ErrorCodes::CURRENT_WRITE_BUFFER_IS_EXHAUSTED); + throw Exception(ErrorCodes::CURRENT_WRITE_BUFFER_IS_EXHAUSTED, "MemoryWriteBuffer limit is exhausted"); } } diff --git a/src/IO/MySQLBinlogEventReadBuffer.cpp b/src/IO/MySQLBinlogEventReadBuffer.cpp index 5b1d23545a2..9f05c5b5e09 100644 --- a/src/IO/MySQLBinlogEventReadBuffer.cpp +++ b/src/IO/MySQLBinlogEventReadBuffer.cpp @@ -13,8 +13,9 @@ MySQLBinlogEventReadBuffer::MySQLBinlogEventReadBuffer(ReadBuffer & in_, size_t : ReadBuffer(nullptr, 0, 0), in(in_), checksum_signature_length(checksum_signature_length_) { if (checksum_signature_length > MAX_CHECKSUM_SIGNATURE_LENGTH) - throw Exception("LOGICAL ERROR: checksum_signature_length must be less than MAX_CHECKSUM_SIGNATURE_LENGTH. It is a bug.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "LOGICAL ERROR: checksum_signature_length must be less than MAX_CHECKSUM_SIGNATURE_LENGTH. " + "It is a bug."); nextIfAtEnd(); } diff --git a/src/IO/MySQLPacketPayloadWriteBuffer.cpp b/src/IO/MySQLPacketPayloadWriteBuffer.cpp index 25ce9a9fcdf..425e1b8d08d 100644 --- a/src/IO/MySQLPacketPayloadWriteBuffer.cpp +++ b/src/IO/MySQLPacketPayloadWriteBuffer.cpp @@ -46,7 +46,7 @@ void MySQLPacketPayloadWriteBuffer::nextImpl() { size_t written = pos - working_buffer.begin(); if (eof) - throw Exception("Cannot write after end of buffer.", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER); + throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, "Cannot write after end of buffer."); out.position() += written; bytes_written += written; diff --git a/src/IO/OpenedFile.cpp b/src/IO/OpenedFile.cpp index f8c460b0833..b75e087e5c3 100644 --- a/src/IO/OpenedFile.cpp +++ b/src/IO/OpenedFile.cpp @@ -67,7 +67,7 @@ void OpenedFile::close() return; if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); fd = -1; metric_increment.destroy(); diff --git a/src/IO/ParallelReadBuffer.cpp b/src/IO/ParallelReadBuffer.cpp index d055a42fcb6..1c7c136c315 100644 --- a/src/IO/ParallelReadBuffer.cpp +++ b/src/IO/ParallelReadBuffer.cpp @@ -85,10 +85,10 @@ void ParallelReadBuffer::addReaders() off_t ParallelReadBuffer::seek(off_t offset, int whence) { if (whence != SEEK_SET) - throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); if (offset < 0) - throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", offset); if (!working_buffer.empty() && static_cast(offset) >= current_position - working_buffer.size() && offset < current_position) { diff --git a/src/IO/PeekableReadBuffer.cpp b/src/IO/PeekableReadBuffer.cpp index c47bdce3924..ce9c20e7a53 100644 --- a/src/IO/PeekableReadBuffer.cpp +++ b/src/IO/PeekableReadBuffer.cpp @@ -127,19 +127,51 @@ void PeekableReadBuffer::rollbackToCheckpoint(bool drop) assert(checkpoint); - if (checkpointInOwnMemory() == currentlyReadFromOwnMemory()) + if (recursive_checkpoints_offsets.empty()) { - /// Both checkpoint and position are in the same buffer. - pos = *checkpoint; + if (checkpointInOwnMemory() == currentlyReadFromOwnMemory()) + { + /// Both checkpoint and position are in the same buffer. + pos = *checkpoint; + } + else + { + /// Checkpoint is in own memory and position is not. + assert(checkpointInOwnMemory()); + + char * memory_data = getMemoryData(); + /// Switch to reading from own memory. + BufferBase::set(memory_data, peeked_size, *checkpoint - memory_data); + } } else { - /// Checkpoint is in own memory and position is not. - assert(checkpointInOwnMemory()); + size_t offset_from_checkpoint = recursive_checkpoints_offsets.top(); + if (checkpointInOwnMemory() == currentlyReadFromOwnMemory()) + { + /// Both checkpoint and position are in the same buffer. + pos = *checkpoint + offset_from_checkpoint; + } + else + { + /// Checkpoint is in own memory and position is not. + assert(checkpointInOwnMemory()); - char * memory_data = getMemoryData(); - /// Switch to reading from own memory. - BufferBase::set(memory_data, peeked_size, *checkpoint - memory_data); + size_t offset_from_checkpoint_in_own_memory = offsetFromCheckpointInOwnMemory(); + if (offset_from_checkpoint >= offset_from_checkpoint_in_own_memory) + { + /// Recursive checkpoint is in sub buffer with current position. + /// Just move position to the recursive checkpoint + pos = buffer().begin() + (offset_from_checkpoint - offset_from_checkpoint_in_own_memory); + } + else + { + /// Recursive checkpoint is in own memory and position is not. + /// Switch to reading from own memory. + char * memory_data = getMemoryData(); + BufferBase::set(memory_data, peeked_size, *checkpoint - memory_data + offset_from_checkpoint); + } + } } if (drop) @@ -205,29 +237,29 @@ void PeekableReadBuffer::checkStateCorrect() const if (checkpointInOwnMemory()) { if (!peeked_size) - throw DB::Exception("Checkpoint in empty own buffer", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Checkpoint in empty own buffer"); if (currentlyReadFromOwnMemory() && pos < *checkpoint) - throw DB::Exception("Current position in own buffer before checkpoint in own buffer", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Current position in own buffer before checkpoint in own buffer"); if (!currentlyReadFromOwnMemory() && pos < sub_buf->position()) - throw DB::Exception("Current position in subbuffer less than sub_buf->position()", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Current position in subbuffer less than sub_buf->position()"); } else { if (peeked_size) - throw DB::Exception("Own buffer is not empty", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Own buffer is not empty"); if (currentlyReadFromOwnMemory()) - throw DB::Exception("Current position in own buffer before checkpoint in subbuffer", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Current position in own buffer before checkpoint in subbuffer"); if (pos < *checkpoint) - throw DB::Exception("Current position in subbuffer before checkpoint in subbuffer", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Current position in subbuffer before checkpoint in subbuffer"); } } else { if (!currentlyReadFromOwnMemory() && peeked_size) - throw DB::Exception("Own buffer is not empty", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Own buffer is not empty"); } if (currentlyReadFromOwnMemory() && !peeked_size) - throw DB::Exception("Pos in empty own buffer", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Pos in empty own buffer"); } void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) @@ -297,7 +329,7 @@ void PeekableReadBuffer::resizeOwnMemoryIfNecessary(size_t bytes_to_append) void PeekableReadBuffer::makeContinuousMemoryFromCheckpointToPos() { if (!checkpoint) - throw DB::Exception("There is no checkpoint", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "There is no checkpoint"); checkStateCorrect(); if (!checkpointInOwnMemory() || currentlyReadFromOwnMemory()) @@ -323,4 +355,24 @@ bool PeekableReadBuffer::hasUnreadData() const return peeked_size && pos != getMemoryData() + peeked_size; } +size_t PeekableReadBuffer::offsetFromCheckpointInOwnMemory() const +{ + return peeked_size - (*checkpoint - getMemoryData()); +} + +size_t PeekableReadBuffer::offsetFromCheckpoint() const +{ + if (!checkpoint) + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "There is no checkpoint"); + + if (checkpointInOwnMemory() == currentlyReadFromOwnMemory()) + { + /// Checkpoint and pos are in the same buffer. + return pos - *checkpoint; + } + + /// Checkpoint is in own memory, position is in sub buffer. + return offset() + offsetFromCheckpointInOwnMemory(); +} + } diff --git a/src/IO/PeekableReadBuffer.h b/src/IO/PeekableReadBuffer.h index f50b0d69dc5..fc827fbad75 100644 --- a/src/IO/PeekableReadBuffer.h +++ b/src/IO/PeekableReadBuffer.h @@ -1,15 +1,11 @@ #pragma once #include #include +#include namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - /// Also allows to set checkpoint at some position in stream and come back to this position later. /// When next() is called, saves data between checkpoint and current position to own memory and loads next data to sub-buffer /// Sub-buffer should not be accessed directly during the lifetime of peekable buffer (unless @@ -29,10 +25,14 @@ public: /// Sets checkpoint at current position ALWAYS_INLINE inline void setCheckpoint() { -#ifndef NDEBUG if (checkpoint) - throw DB::Exception("Does not support recursive checkpoints.", ErrorCodes::LOGICAL_ERROR); -#endif + { + /// Recursive checkpoints. We just remember offset from the + /// first checkpoint to the current position. + recursive_checkpoints_offsets.push(offsetFromCheckpoint()); + return; + } + checkpoint_in_own_memory = currentlyReadFromOwnMemory(); if (!checkpoint_in_own_memory) { @@ -46,6 +46,13 @@ public: ALWAYS_INLINE inline void dropCheckpoint() { assert(checkpoint); + + if (!recursive_checkpoints_offsets.empty()) + { + recursive_checkpoints_offsets.pop(); + return; + } + if (!currentlyReadFromOwnMemory()) { /// Don't need to store unread data anymore @@ -73,6 +80,8 @@ public: void setSubBuffer(ReadBuffer & sub_buf_); + const ReadBuffer & getSubBuffer() const { return *sub_buf; } + private: bool nextImpl() override; @@ -93,6 +102,9 @@ private: char * getMemoryData() { return use_stack_memory ? stack_memory : memory.data(); } const char * getMemoryData() const { return use_stack_memory ? stack_memory : memory.data(); } + size_t offsetFromCheckpointInOwnMemory() const; + size_t offsetFromCheckpoint() const; + ReadBuffer * sub_buf; size_t peeked_size = 0; @@ -105,6 +117,8 @@ private: /// larger buffer only if reserved memory is not enough. char stack_memory[PADDING_FOR_SIMD]; bool use_stack_memory = true; + + std::stack recursive_checkpoints_offsets; }; diff --git a/src/IO/ReadBuffer.h b/src/IO/ReadBuffer.h index 9c2beb5ad18..5b9cda390c8 100644 --- a/src/IO/ReadBuffer.h +++ b/src/IO/ReadBuffer.h @@ -188,7 +188,8 @@ public: { auto read_bytes = read(to, n); if (n != read_bytes) - throw Exception("Cannot read all data. Bytes read: " + std::to_string(read_bytes) + ". Bytes expected: " + std::to_string(n) + ".", ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, + "Cannot read all data. Bytes read: {}. Bytes expected: {}.", read_bytes, std::to_string(n)); } /** A method that can be more efficiently implemented in derived classes, in the case of reading large enough blocks. @@ -217,7 +218,7 @@ public: /// Such as ReadBufferFromRemoteFSGather and AsynchronousReadIndirectBufferFromRemoteFS. virtual IAsynchronousReader::Result readInto(char * /*data*/, size_t /*size*/, size_t /*offset*/, size_t /*ignore*/) { - throw Exception("readInto not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "readInto not implemented"); } protected: @@ -235,7 +236,7 @@ private: [[noreturn]] static void throwReadAfterEOF() { - throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF); + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof"); } }; diff --git a/src/IO/ReadBufferFromEncryptedFile.cpp b/src/IO/ReadBufferFromEncryptedFile.cpp index 22f994bb44b..f9cf1597153 100644 --- a/src/IO/ReadBufferFromEncryptedFile.cpp +++ b/src/IO/ReadBufferFromEncryptedFile.cpp @@ -30,17 +30,17 @@ off_t ReadBufferFromEncryptedFile::seek(off_t off, int whence) if (whence == SEEK_SET) { if (off < 0) - throw Exception("SEEK_SET underflow: off = " + std::to_string(off), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "SEEK_SET underflow: off = {}", off); new_pos = off; } else if (whence == SEEK_CUR) { if (off < 0 && -off > getPosition()) - throw Exception("SEEK_CUR shift out of bounds", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "SEEK_CUR shift out of bounds"); new_pos = getPosition() + off; } else - throw Exception("ReadBufferFromFileEncrypted::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "ReadBufferFromFileEncrypted::seek expects SEEK_SET or SEEK_CUR as whence"); if ((offset - static_cast(working_buffer.size()) <= new_pos) && (new_pos <= offset) && !need_seek) { diff --git a/src/IO/ReadBufferFromFile.cpp b/src/IO/ReadBufferFromFile.cpp index 5ef75ad01ec..5e95271e142 100644 --- a/src/IO/ReadBufferFromFile.cpp +++ b/src/IO/ReadBufferFromFile.cpp @@ -83,7 +83,7 @@ void ReadBufferFromFile::close() return; if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); fd = -1; metric_increment.destroy(); diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index b0e3a1ac7cd..f2f54f1c86d 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -147,7 +147,7 @@ off_t ReadBufferFromFileDescriptor::seek(off_t offset, int whence) } else { - throw Exception("ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "ReadBufferFromFileDescriptor::seek expects SEEK_SET or SEEK_CUR as whence"); } /// Position is unchanged. diff --git a/src/IO/ReadBufferFromIStream.cpp b/src/IO/ReadBufferFromIStream.cpp index 5e7f7545b56..e0c966fb700 100644 --- a/src/IO/ReadBufferFromIStream.cpp +++ b/src/IO/ReadBufferFromIStream.cpp @@ -21,9 +21,9 @@ bool ReadBufferFromIStream::nextImpl() return false; if (istr.fail()) - throw Exception("Cannot read from istream at offset " + std::to_string(count()), ErrorCodes::CANNOT_READ_FROM_ISTREAM); + throw Exception(ErrorCodes::CANNOT_READ_FROM_ISTREAM, "Cannot read from istream at offset {}", count()); - throw Exception("Unexpected state of istream at offset " + std::to_string(count()), ErrorCodes::CANNOT_READ_FROM_ISTREAM); + throw Exception(ErrorCodes::CANNOT_READ_FROM_ISTREAM, "Unexpected state of istream at offset {}", count()); } else working_buffer.resize(gcount); diff --git a/src/IO/ReadBufferFromMemory.cpp b/src/IO/ReadBufferFromMemory.cpp index 3ed5603ce2d..ea0054f24d4 100644 --- a/src/IO/ReadBufferFromMemory.cpp +++ b/src/IO/ReadBufferFromMemory.cpp @@ -19,11 +19,8 @@ off_t ReadBufferFromMemory::seek(off_t offset, int whence) return static_cast(pos - internal_buffer.begin()); } else - throw Exception( - "Seek position is out of bounds. " - "Offset: " - + std::to_string(offset) + ", Max: " + std::to_string(static_cast(internal_buffer.end() - internal_buffer.begin())), - ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}, Max: {}", + offset, std::to_string(static_cast(internal_buffer.end() - internal_buffer.begin()))); } else if (whence == SEEK_CUR) { @@ -35,14 +32,11 @@ off_t ReadBufferFromMemory::seek(off_t offset, int whence) return static_cast(pos - internal_buffer.begin()); } else - throw Exception( - "Seek position is out of bounds. " - "Offset: " - + std::to_string(offset) + ", Max: " + std::to_string(static_cast(internal_buffer.end() - internal_buffer.begin())), - ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}, Max: {}", + offset, std::to_string(static_cast(internal_buffer.end() - internal_buffer.begin()))); } else - throw Exception("Only SEEK_SET and SEEK_CUR seek modes allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET and SEEK_CUR seek modes allowed."); } off_t ReadBufferFromMemory::getPosition() diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp index 7ba23dd1588..046646ed003 100644 --- a/src/IO/ReadBufferFromPocoSocket.cpp +++ b/src/IO/ReadBufferFromPocoSocket.cpp @@ -62,21 +62,21 @@ bool ReadBufferFromPocoSocket::nextImpl() } catch (const Poco::Net::NetException & e) { - throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), peer_address.toString()); } catch (const Poco::TimeoutException &) { - throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while reading from socket ({}, {} ms)", peer_address.toString(), - socket.impl()->getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + socket.impl()->getReceiveTimeout().totalMilliseconds()); } catch (const Poco::IOException & e) { - throw NetException(e.displayText() + ", while reading from socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), peer_address.toString()); } if (bytes_read < 0) - throw NetException("Cannot read from socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket ({})", peer_address.toString()); if (bytes_read) working_buffer.resize(bytes_read); diff --git a/src/IO/ReadBufferFromS3.cpp b/src/IO/ReadBufferFromS3.cpp index 69d2a244097..6cc98464af2 100644 --- a/src/IO/ReadBufferFromS3.cpp +++ b/src/IO/ReadBufferFromS3.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -26,6 +27,8 @@ namespace ProfileEvents extern const Event ReadBufferSeekCancelConnection; extern const Event S3GetObject; extern const Event DiskS3GetObject; + extern const Event RemoteReadThrottlerBytes; + extern const Event RemoteReadThrottlerSleepMicroseconds; } namespace DB @@ -186,7 +189,7 @@ bool ReadBufferFromS3::nextImpl() ProfileEvents::increment(ProfileEvents::ReadBufferFromS3Bytes, working_buffer.size()); offset += working_buffer.size(); if (read_settings.remote_throttler) - read_settings.remote_throttler->add(working_buffer.size()); + read_settings.remote_throttler->add(working_buffer.size(), ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); return true; } @@ -199,15 +202,16 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence) if (impl && restricted_seek) throw Exception( - ErrorCodes::CANNOT_SEEK_THROUGH_FILE, - "Seek is allowed only before first read attempt from the buffer (current offset: {}, new offset: {}, reading until position: {}, available: {})", - offset, offset_, read_until_position, available()); + ErrorCodes::CANNOT_SEEK_THROUGH_FILE, + "Seek is allowed only before first read attempt from the buffer (current offset: " + "{}, new offset: {}, reading until position: {}, available: {})", + offset, offset_, read_until_position, available()); if (whence != SEEK_SET) - throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); if (offset_ < 0) - throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", offset_); if (!restricted_seek) { @@ -250,7 +254,7 @@ size_t ReadBufferFromS3::getFileSize() if (file_size) return *file_size; - auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, /* for_disk_s3= */ read_settings.for_object_storage); + auto object_size = S3::getObjectSize(*client_ptr, bucket, key, version_id, request_settings, /* for_disk_s3= */ read_settings.for_object_storage); file_size = object_size; return *file_size; diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp index fd382697f3f..86a2b9c650e 100644 --- a/src/IO/ReadHelpers.cpp +++ b/src/IO/ReadHelpers.cpp @@ -95,14 +95,14 @@ void parseUUIDWithoutSeparator(const UInt8 * src36, std::reverse_iterator) - throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); + throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Cannot parse escape sequence"); else return ReturnType(false); } @@ -381,7 +381,7 @@ static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf) auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]]) { if constexpr (throw_exception) - throw Exception(message, code); + throw Exception::createDeprecated(message, code); return ReturnType(false); }; @@ -491,8 +491,8 @@ static ReturnType parseJSONEscapeSequence(Vector & s, ReadBuffer & buf) } -template -void readEscapedStringInto(Vector & s, ReadBuffer & buf) +template +void readEscapedStringIntoImpl(Vector & s, ReadBuffer & buf) { while (!buf.eof()) { @@ -508,10 +508,32 @@ void readEscapedStringInto(Vector & s, ReadBuffer & buf) return; if (*buf.position() == '\\') - parseComplexEscapeSequence(s, buf); + { + if constexpr (parse_complex_escape_sequence) + { + parseComplexEscapeSequence(s, buf); + } + else + { + s.push_back(*buf.position()); + ++buf.position(); + if (!buf.eof()) + { + s.push_back(*buf.position()); + ++buf.position(); + } + } + } } } +template +void readEscapedStringInto(Vector & s, ReadBuffer & buf) +{ + readEscapedStringIntoImpl(s, buf); +} + + void readEscapedString(String & s, ReadBuffer & buf) { s.clear(); @@ -581,8 +603,7 @@ static ReturnType readAnyQuotedStringInto(Vector & s, ReadBuffer & buf) } if constexpr (throw_exception) - throw ParsingException("Cannot parse quoted string: expected closing quote", - ErrorCodes::CANNOT_PARSE_QUOTED_STRING); + throw ParsingException(ErrorCodes::CANNOT_PARSE_QUOTED_STRING, "Cannot parse quoted string: expected closing quote"); else return ReturnType(false); } @@ -665,7 +686,7 @@ concept WithResize = requires (T value) { value.size() } -> std::integral<>; }; -template +template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings) { /// Empty string @@ -682,6 +703,9 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & if ((settings.allow_single_quotes && maybe_quote == '\'') || (settings.allow_double_quotes && maybe_quote == '"')) { + if constexpr (include_quotes) + s.push_back(maybe_quote); + ++buf.position(); /// The quoted case. We are looking for the next quotation mark. @@ -698,8 +722,12 @@ void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & if (!buf.hasPendingData()) continue; + if constexpr (include_quotes) + s.push_back(maybe_quote); + /// Now there is a quotation mark under the cursor. Is there any following? ++buf.position(); + if (buf.eof()) return; @@ -829,33 +857,23 @@ void readCSVString(String & s, ReadBuffer & buf, const FormatSettings::CSV & set void readCSVField(String & s, ReadBuffer & buf, const FormatSettings::CSV & settings) { s.clear(); - bool add_quote = false; - char quote = '\''; - - if (!buf.eof() && (*buf.position() == '\'' || *buf.position() == '"')) - { - quote = *buf.position(); - s.push_back(quote); - add_quote = true; - } - - readCSVStringInto(s, buf, settings); - - if (add_quote) - s.push_back(quote); + readCSVStringInto(s, buf, settings); } void readCSVWithTwoPossibleDelimitersImpl(String & s, PeekableReadBuffer & buf, const String & first_delimiter, const String & second_delimiter) { /// Check that delimiters are not empty. if (first_delimiter.empty() || second_delimiter.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read CSV field with two possible delimiters, one of delimiters '{}' and '{}' is empty", first_delimiter, second_delimiter); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot read CSV field with two possible delimiters, one " + "of delimiters '{}' and '{}' is empty", first_delimiter, second_delimiter); /// Read all data until first_delimiter or second_delimiter while (true) { if (buf.eof()) - throw Exception(ErrorCodes::INCORRECT_DATA, R"(Unexpected EOF while reading CSV string, expected on of delimiters "{}" or "{}")", first_delimiter, second_delimiter); + throw Exception(ErrorCodes::INCORRECT_DATA, R"(Unexpected EOF while reading CSV string, expected on " + "of delimiters "{}" or "{}")", first_delimiter, second_delimiter); char * next_pos = buf.position(); while (next_pos != buf.buffer().end() && *next_pos != first_delimiter[0] && *next_pos != second_delimiter[0]) @@ -920,10 +938,10 @@ ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf) { static constexpr bool throw_exception = std::is_same_v; - auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]]) + auto error = [](FormatStringHelper<> message [[maybe_unused]], int code [[maybe_unused]]) { if constexpr (throw_exception) - throw ParsingException(message, code); + throw ParsingException(code, std::move(message)); return ReturnType(false); }; @@ -971,10 +989,10 @@ ReturnType readJSONObjectPossiblyInvalid(Vector & s, ReadBuffer & buf) { static constexpr bool throw_exception = std::is_same_v; - auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]]) + auto error = [](FormatStringHelper<> message [[maybe_unused]], int code [[maybe_unused]]) { if constexpr (throw_exception) - throw ParsingException(message, code); + throw ParsingException(code, std::move(message)); return ReturnType(false); }; @@ -1037,7 +1055,7 @@ ReturnType readDateTextFallback(LocalDate & date, ReadBuffer & buf) auto error = [] { if constexpr (throw_exception) - throw Exception("Cannot parse date: value is too short", ErrorCodes::CANNOT_PARSE_DATE); + throw Exception(ErrorCodes::CANNOT_PARSE_DATE, "Cannot parse date: value is too short"); return ReturnType(false); }; @@ -1142,7 +1160,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw ParsingException(std::string("Cannot parse DateTime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME); + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", s); else return false; } @@ -1165,7 +1183,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D s_pos[size] = 0; if constexpr (throw_exception) - throw ParsingException(std::string("Cannot parse time component of DateTime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME); + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", s); else return false; } @@ -1192,7 +1210,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D else { if constexpr (throw_exception) - throw ParsingException("Cannot parse datetime", ErrorCodes::CANNOT_PARSE_DATETIME); + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse datetime"); else return false; } @@ -1208,7 +1226,7 @@ template bool readDateTimeTextFallback(time_t &, ReadBuffer &, const DateL void skipJSONField(ReadBuffer & buf, StringRef name_of_field) { if (buf.eof()) - throw Exception("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString()); else if (*buf.position() == '"') /// skip double-quoted string { NullOutput sink; @@ -1221,7 +1239,7 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) double v; if (!tryReadFloatText(v, buf)) - throw Exception("Expected a number field for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Expected a number field for key '{}'", name_of_field.toString()); } else if (*buf.position() == 'n') /// skip null { @@ -1262,7 +1280,7 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) break; } else - throw Exception("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); } } else if (*buf.position() == '{') /// skip whole object @@ -1279,12 +1297,12 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) readJSONStringInto(sink, buf); } else - throw Exception("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); // ':' skipWhitespaceIfAny(buf); if (buf.eof() || !(*buf.position() == ':')) - throw Exception("Unexpected symbol for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol for key '{}'", name_of_field.toString()); ++buf.position(); skipWhitespaceIfAny(buf); @@ -1300,12 +1318,13 @@ void skipJSONField(ReadBuffer & buf, StringRef name_of_field) } if (buf.eof()) - throw Exception("Unexpected EOF for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected EOF for key '{}'", name_of_field.toString()); ++buf.position(); } else { - throw Exception("Unexpected symbol '" + std::string(*buf.position(), 1) + "' for key '" + name_of_field.toString() + "'", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected symbol '{}' for key '{}'", + std::string(*buf.position(), 1), name_of_field.toString()); } } @@ -1337,7 +1356,7 @@ Exception readException(ReadBuffer & buf, const String & additional_message, boo if (!stack_trace.empty()) out << " Stack trace:\n\n" << stack_trace; - return Exception(out.str(), code, remote_exception); + return Exception::createDeprecated(out.str(), code, remote_exception); } void readAndThrowException(ReadBuffer & buf, const String & additional_message) @@ -1679,4 +1698,10 @@ void readJSONField(String & s, ReadBuffer & buf) readParsedValueInto(s, buf, parse_func); } +void readTSVField(String & s, ReadBuffer & buf) +{ + s.clear(); + readEscapedStringIntoImpl(s, buf); +} + } diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h index f69e3d5c7b3..fd547220069 100644 --- a/src/IO/ReadHelpers.h +++ b/src/IO/ReadHelpers.h @@ -287,7 +287,7 @@ inline void readBoolTextWord(bool & x, ReadBuffer & buf, bool support_upper_case [[fallthrough]]; } default: - throw ParsingException("Unexpected Bool value", ErrorCodes::CANNOT_PARSE_BOOL); + throw ParsingException(ErrorCodes::CANNOT_PARSE_BOOL, "Unexpected Bool value"); } } @@ -331,9 +331,8 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) if (has_sign) { if constexpr (throw_exception) - throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters", - ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, + "Cannot parse number with multiple sign (+/-) characters"); else return ReturnType(false); } @@ -349,9 +348,8 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) if (has_sign) { if constexpr (throw_exception) - throw ParsingException( - "Cannot parse number with multiple sign (+/-) characters", - ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, + "Cannot parse number with multiple sign (+/-) characters"); else return ReturnType(false); } @@ -361,7 +359,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf) else { if constexpr (throw_exception) - throw ParsingException("Unsigned type must not contain '-' symbol", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Unsigned type must not contain '-' symbol"); else return ReturnType(false); } @@ -423,8 +421,8 @@ end: if (has_sign && !has_number) { if constexpr (throw_exception) - throw ParsingException( - "Cannot parse number with a sign character but without any numeric character", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, + "Cannot parse number with a sign character but without any numeric character"); else return ReturnType(false); } @@ -604,7 +602,7 @@ void readBackQuotedStringInto(Vector & s, ReadBuffer & buf); template void readStringUntilEOFInto(Vector & s, ReadBuffer & buf); -template +template void readCSVStringInto(Vector & s, ReadBuffer & buf, const FormatSettings::CSV & settings); /// ReturnType is either bool or void. If bool, the function will return false instead of throwing an exception. @@ -808,7 +806,7 @@ inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf) if constexpr (throw_exception) { - throw ParsingException(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); + throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", s); } else { @@ -829,7 +827,7 @@ inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf) if constexpr (throw_exception) { - throw ParsingException(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID); + throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", s); } else { @@ -855,7 +853,7 @@ inline ReturnType readIPv4TextImpl(IPv4 & ip, ReadBuffer & buf) return ReturnType(true); if constexpr (std::is_same_v) - throw ParsingException(std::string("Cannot parse IPv4 ").append(buf.position(), buf.available()), ErrorCodes::CANNOT_PARSE_IPV4); + throw ParsingException(ErrorCodes::CANNOT_PARSE_IPV4, "Cannot parse IPv4 {}", std::string_view(buf.position(), buf.available())); else return ReturnType(false); } @@ -877,7 +875,7 @@ inline ReturnType readIPv6TextImpl(IPv6 & ip, ReadBuffer & buf) return ReturnType(true); if constexpr (std::is_same_v) - throw ParsingException(std::string("Cannot parse IPv6 ").append(buf.position(), buf.available()), ErrorCodes::CANNOT_PARSE_IPV6); + throw ParsingException(ErrorCodes::CANNOT_PARSE_IPV6, "Cannot parse IPv6 {}", std::string_view(buf.position(), buf.available())); else return ReturnType(false); } @@ -1061,7 +1059,7 @@ inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) if (10 != size) { s[size] = 0; - throw ParsingException(std::string("Cannot parse DateTime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME); + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse DateTime {}", s); } datetime.year((s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0')); @@ -1077,7 +1075,7 @@ inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf) if (8 != size) { s[size] = 0; - throw ParsingException(std::string("Cannot parse time component of DateTime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME); + throw ParsingException(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot parse time component of DateTime {}", s); } datetime.hour((s[0] - '0') * 10 + (s[1] - '0')); @@ -1300,7 +1298,7 @@ void readQuoted(std::vector & x, ReadBuffer & buf) if (*buf.position() == ',') ++buf.position(); else - throw ParsingException("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT); + throw ParsingException(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Cannot read array from text"); } first = false; @@ -1323,7 +1321,7 @@ void readDoubleQuoted(std::vector & x, ReadBuffer & buf) if (*buf.position() == ',') ++buf.position(); else - throw ParsingException("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT); + throw ParsingException(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Cannot read array from text"); } first = false; @@ -1575,4 +1573,5 @@ void readQuotedField(String & s, ReadBuffer & buf); void readJSONField(String & s, ReadBuffer & buf); +void readTSVField(String & s, ReadBuffer & buf); } diff --git a/src/IO/ReadSettings.h b/src/IO/ReadSettings.h index 1fa4aa637f5..4a0344bf11b 100644 --- a/src/IO/ReadSettings.h +++ b/src/IO/ReadSettings.h @@ -31,6 +31,13 @@ enum class LocalFSReadMethod */ mmap, + /** + * Use the io_uring Linux subsystem for asynchronous reads. + * Can use direct IO after specified size. + * Can do prefetch with double buffering. + */ + io_uring, + /** * Checks if data is in page cache with 'preadv2' on modern Linux kernels. * If data is in page cache, read from the same thread. @@ -90,7 +97,7 @@ struct ReadSettings /// they will do it. But this behaviour can be changed with this setting. bool enable_filesystem_cache_on_lower_level = true; - size_t max_query_cache_size = (128UL * 1024 * 1024 * 1024); + size_t filesystem_cache_max_download_size = (128UL * 1024 * 1024 * 1024); bool skip_download_if_exceeds_query_cache = true; size_t remote_read_min_bytes_for_seek = DBMS_DEFAULT_BUFFER_SIZE; diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h index 1437b03593f..5bc72c5ff62 100644 --- a/src/IO/ReadWriteBufferFromHTTP.h +++ b/src/IO/ReadWriteBufferFromHTTP.h @@ -627,11 +627,11 @@ namespace detail off_t seek(off_t offset_, int whence) override { if (whence != SEEK_SET) - throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); if (offset_ < 0) - throw Exception( - "Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", + offset_); off_t current_offset = getOffset(); if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset) diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp index 33553d483eb..b0ab1c52409 100644 --- a/src/IO/S3/PocoHTTPClient.cpp +++ b/src/IO/S3/PocoHTTPClient.cpp @@ -56,6 +56,16 @@ namespace ProfileEvents extern const Event DiskS3WriteRequestsErrors; extern const Event DiskS3WriteRequestsThrottling; extern const Event DiskS3WriteRequestsRedirects; + + extern const Event S3GetRequestThrottlerCount; + extern const Event S3GetRequestThrottlerSleepMicroseconds; + extern const Event S3PutRequestThrottlerCount; + extern const Event S3PutRequestThrottlerSleepMicroseconds; + + extern const Event DiskS3GetRequestThrottlerCount; + extern const Event DiskS3GetRequestThrottlerSleepMicroseconds; + extern const Event DiskS3PutRequestThrottlerCount; + extern const Event DiskS3PutRequestThrottlerSleepMicroseconds; } namespace CurrentMetrics @@ -212,7 +222,7 @@ PocoHTTPClient::S3MetricKind PocoHTTPClient::getMetricKind(const Aws::Http::Http case Aws::Http::HttpMethod::HTTP_PATCH: return S3MetricKind::Write; } - throw Exception("Unsupported request method", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported request method"); } void PocoHTTPClient::addMetric(const Aws::Http::HttpRequest & request, S3MetricType type, ProfileEvents::Count amount) const @@ -257,13 +267,27 @@ void PocoHTTPClient::makeRequestInternal( case Aws::Http::HttpMethod::HTTP_GET: case Aws::Http::HttpMethod::HTTP_HEAD: if (get_request_throttler) - get_request_throttler->add(1); + { + UInt64 sleep_us = get_request_throttler->add(1, ProfileEvents::S3GetRequestThrottlerCount, ProfileEvents::S3GetRequestThrottlerSleepMicroseconds); + if (for_disk_s3) + { + ProfileEvents::increment(ProfileEvents::DiskS3GetRequestThrottlerCount); + ProfileEvents::increment(ProfileEvents::DiskS3GetRequestThrottlerSleepMicroseconds, sleep_us); + } + } break; case Aws::Http::HttpMethod::HTTP_PUT: case Aws::Http::HttpMethod::HTTP_POST: case Aws::Http::HttpMethod::HTTP_PATCH: if (put_request_throttler) - put_request_throttler->add(1); + { + UInt64 sleep_us = put_request_throttler->add(1, ProfileEvents::S3PutRequestThrottlerCount, ProfileEvents::S3PutRequestThrottlerSleepMicroseconds); + if (for_disk_s3) + { + ProfileEvents::increment(ProfileEvents::DiskS3PutRequestThrottlerCount); + ProfileEvents::increment(ProfileEvents::DiskS3PutRequestThrottlerSleepMicroseconds, sleep_us); + } + } break; case Aws::Http::HttpMethod::HTTP_DELETE: break; // Not throttled @@ -477,8 +501,7 @@ void PocoHTTPClient::makeRequestInternal( return; } - throw Exception(String("Too many redirects while trying to access ") + request.GetUri().GetURIString(), - ErrorCodes::TOO_MANY_REDIRECTS); + throw Exception(ErrorCodes::TOO_MANY_REDIRECTS, "Too many redirects while trying to access {}", request.GetUri().GetURIString()); } catch (...) { diff --git a/src/IO/S3/copyDataToS3.cpp b/src/IO/S3/copyS3File.cpp similarity index 77% rename from src/IO/S3/copyDataToS3.cpp rename to src/IO/S3/copyS3File.cpp index f7018c51359..c4f9718e906 100644 --- a/src/IO/S3/copyDataToS3.cpp +++ b/src/IO/S3/copyS3File.cpp @@ -1,10 +1,11 @@ -#include +#include #if USE_AWS_S3 #include #include #include +#include #include #include @@ -22,8 +23,17 @@ namespace ProfileEvents { extern const Event S3CreateMultipartUpload; extern const Event S3CompleteMultipartUpload; - extern const Event S3UploadPart; extern const Event S3PutObject; + extern const Event S3CopyObject; + extern const Event S3UploadPart; + extern const Event S3UploadPartCopy; + + extern const Event DiskS3CreateMultipartUpload; + extern const Event DiskS3CompleteMultipartUpload; + extern const Event DiskS3PutObject; + extern const Event DiskS3CopyObject; + extern const Event DiskS3UploadPart; + extern const Event DiskS3UploadPartCopy; } @@ -50,15 +60,16 @@ namespace const S3Settings::RequestSettings & request_settings_, const std::optional> & object_metadata_, ThreadPoolCallbackRunner schedule_, + bool for_disk_s3_, const Poco::Logger * log_) : client_ptr(client_ptr_) , dest_bucket(dest_bucket_) , dest_key(dest_key_) - , settings(request_settings_.getUploadSettings()) - , check_objects_after_upload(request_settings_.check_objects_after_upload) - , max_unexpected_write_error_retries(request_settings_.max_unexpected_write_error_retries) + , request_settings(request_settings_) + , upload_settings(request_settings.getUploadSettings()) , object_metadata(object_metadata_) , schedule(schedule_) + , for_disk_s3(for_disk_s3_) , log(log_) { } @@ -69,11 +80,11 @@ namespace std::shared_ptr client_ptr; const String & dest_bucket; const String & dest_key; - const S3Settings::RequestSettings::PartUploadSettings & settings; - bool check_objects_after_upload; - size_t max_unexpected_write_error_retries; + const S3Settings::RequestSettings & request_settings; + const S3Settings::RequestSettings::PartUploadSettings & upload_settings; const std::optional> & object_metadata; ThreadPoolCallbackRunner schedule; + bool for_disk_s3; const Poco::Logger * log; struct UploadPartTask @@ -107,10 +118,13 @@ namespace if (object_metadata.has_value()) request.SetMetadata(object_metadata.value()); - if (!settings.storage_class_name.empty()) - request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(settings.storage_class_name)); + const auto & storage_class_name = upload_settings.storage_class_name; + if (!storage_class_name.empty()) + request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name)); ProfileEvents::increment(ProfileEvents::S3CreateMultipartUpload); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3CreateMultipartUpload); auto outcome = client_ptr->CreateMultipartUpload(request); @@ -131,7 +145,7 @@ namespace LOG_TRACE(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", dest_bucket, dest_key, multipart_upload_id, part_tags.size()); if (part_tags.empty()) - throw Exception("Failed to complete multipart upload. No parts have uploaded", ErrorCodes::S3_ERROR); + throw Exception(ErrorCodes::S3_ERROR, "Failed to complete multipart upload. No parts have uploaded"); Aws::S3::Model::CompleteMultipartUploadRequest request; request.SetBucket(dest_bucket); @@ -147,10 +161,12 @@ namespace request.SetMultipartUpload(multipart_upload); - size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); - for (size_t i = 0; i < max_retry; ++i) + size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + for (size_t retries = 1;; ++retries) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3CompleteMultipartUpload); auto outcome = client_ptr->CompleteMultipartUpload(request); @@ -159,20 +175,19 @@ namespace LOG_TRACE(log, "Multipart upload has completed. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", dest_bucket, dest_key, multipart_upload_id, part_tags.size()); break; } - else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + + if ((outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) && (retries < max_retries)) { /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests /// BTW, NO_SUCH_UPLOAD is expected error and we shouldn't retry it LOG_INFO(log, "Multipart upload failed with NO_SUCH_KEY error for Bucket: {}, Key: {}, Upload_id: {}, Parts: {}, will retry", dest_bucket, dest_key, multipart_upload_id, part_tags.size()); - /// will retry - } - else - { - throw S3Exception( - outcome.GetError().GetErrorType(), - "Message: {}, Key: {}, Bucket: {}, Tags: {}", - outcome.GetError().GetMessage(), dest_key, dest_bucket, fmt::join(part_tags.begin(), part_tags.end(), " ")); + continue; /// will retry } + + throw S3Exception( + outcome.GetError().GetErrorType(), + "Message: {}, Key: {}, Bucket: {}, Tags: {}", + outcome.GetError().GetMessage(), dest_key, dest_bucket, fmt::join(part_tags.begin(), part_tags.end(), " ")); } } @@ -190,7 +205,7 @@ namespace void checkObjectAfterUpload() { LOG_TRACE(log, "Checking object {} exists after upload", dest_key); - S3::checkObjectExists(*client_ptr, dest_bucket, dest_key, {}, {}, "Immediately after upload"); + S3::checkObjectExists(*client_ptr, dest_bucket, dest_key, {}, request_settings, {}, "Immediately after upload"); LOG_TRACE(log, "Object {} exists after upload", dest_key); } @@ -224,47 +239,49 @@ namespace if (!total_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chosen multipart upload for an empty file. This must not happen"); - if (!settings.max_part_number) + auto max_part_number = upload_settings.max_part_number; + auto min_upload_part_size = upload_settings.min_upload_part_size; + auto max_upload_part_size = upload_settings.max_upload_part_size; + + if (!max_part_number) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_part_number must not be 0"); - else if (!settings.min_upload_part_size) + else if (!min_upload_part_size) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "min_upload_part_size must not be 0"); - else if (settings.max_upload_part_size < settings.min_upload_part_size) + else if (max_upload_part_size < min_upload_part_size) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "max_upload_part_size must not be less than min_upload_part_size"); - size_t part_size = settings.min_upload_part_size; + size_t part_size = min_upload_part_size; size_t num_parts = (total_size + part_size - 1) / part_size; - if (num_parts > settings.max_part_number) + if (num_parts > max_part_number) { - part_size = (total_size + settings.max_part_number - 1) / settings.max_part_number; + part_size = (total_size + max_part_number - 1) / max_part_number; num_parts = (total_size + part_size - 1) / part_size; } - if (part_size > settings.max_upload_part_size) + if (part_size > max_upload_part_size) { - part_size = settings.max_upload_part_size; + part_size = max_upload_part_size; num_parts = (total_size + part_size - 1) / part_size; } - if (num_parts < 1 || num_parts > settings.max_part_number || part_size < settings.min_upload_part_size - || part_size > settings.max_upload_part_size) + if (num_parts < 1 || num_parts > max_part_number || part_size < min_upload_part_size || part_size > max_upload_part_size) { String msg; if (num_parts < 1) msg = "Number of parts is zero"; - else if (num_parts > settings.max_part_number) - msg = fmt::format("Number of parts exceeds {}", num_parts, settings.max_part_number); - else if (part_size < settings.min_upload_part_size) - msg = fmt::format("Size of a part is less than {}", part_size, settings.min_upload_part_size); + else if (num_parts > max_part_number) + msg = fmt::format("Number of parts exceeds {}", num_parts, max_part_number); + else if (part_size < min_upload_part_size) + msg = fmt::format("Size of a part is less than {}", part_size, min_upload_part_size); else - msg = fmt::format("Size of a part exceeds {}", part_size, settings.max_upload_part_size); + msg = fmt::format("Size of a part exceeds {}", part_size, max_upload_part_size); throw Exception( ErrorCodes::INVALID_CONFIG_PARAMETER, "{} while writing {} bytes to S3. Check max_part_number = {}, " - "min_upload_part_size = {}, max_upload_part_size = {}, max_single_part_upload_size = {}", - msg, total_size, settings.max_part_number, settings.min_upload_part_size, - settings.max_upload_part_size, settings.max_single_part_upload_size); + "min_upload_part_size = {}, max_upload_part_size = {}", + msg, total_size, max_part_number, min_upload_part_size, max_upload_part_size); } /// We've calculated the size of a normal part (the final part can be smaller). @@ -379,11 +396,11 @@ namespace } }; - /// Helper class to help implementing copyDataToS3(). - class CopyDataToS3Helper : public UploadHelper + /// Helper class to help implementing copyDataToS3File(). + class CopyDataToFileHelper : public UploadHelper { public: - CopyDataToS3Helper( + CopyDataToFileHelper( const std::function()> & create_read_buffer_, size_t offset_, size_t size_, @@ -392,8 +409,9 @@ namespace const String & dest_key_, const S3Settings::RequestSettings & request_settings_, const std::optional> & object_metadata_, - ThreadPoolCallbackRunner schedule_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, &Poco::Logger::get("copyDataToS3")) + ThreadPoolCallbackRunner schedule_, + bool for_disk_s3_) + : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyDataToS3File")) , create_read_buffer(create_read_buffer_) , offset(offset_) , size(size_) @@ -402,12 +420,12 @@ namespace void performCopy() { - if (size <= settings.max_single_part_upload_size) + if (size <= upload_settings.max_single_part_upload_size) performSinglepartUpload(); else performMultipartUpload(); - if (check_objects_after_upload) + if (request_settings.check_objects_after_upload) checkObjectAfterUpload(); } @@ -435,8 +453,9 @@ namespace if (object_metadata.has_value()) request.SetMetadata(object_metadata.value()); - if (!settings.storage_class_name.empty()) - request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(settings.storage_class_name)); + const auto & storage_class_name = upload_settings.storage_class_name; + if (!storage_class_name.empty()) + request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name)); /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 request.SetContentType("binary/octet-stream"); @@ -444,10 +463,13 @@ namespace void processPutRequest(const Aws::S3::Model::PutObjectRequest & request) { - size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); - for (size_t i = 0; i < max_retry; ++i) + size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + for (size_t retries = 1;; ++retries) { ProfileEvents::increment(ProfileEvents::S3PutObject); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3PutObject); + auto outcome = client_ptr->PutObject(request); if (outcome.IsSuccess()) @@ -460,7 +482,8 @@ namespace request.GetContentLength()); break; } - else if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest") + + if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest") { // Can't come here with MinIO, MinIO allows single part upload for large objects. LOG_INFO( @@ -473,7 +496,8 @@ namespace performMultipartUpload(); break; } - else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + + if ((outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) && (retries < max_retries)) { /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests LOG_INFO( @@ -482,18 +506,16 @@ namespace dest_bucket, dest_key, request.GetContentLength()); - /// will retry - } - else - { - throw S3Exception( - outcome.GetError().GetErrorType(), - "Message: {}, Key: {}, Bucket: {}, Object size: {}", - outcome.GetError().GetMessage(), - dest_key, - dest_bucket, - request.GetContentLength()); + continue; /// will retry } + + throw S3Exception( + outcome.GetError().GetErrorType(), + "Message: {}, Key: {}, Bucket: {}, Object size: {}", + outcome.GetError().GetMessage(), + dest_key, + dest_bucket, + request.GetContentLength()); } } @@ -523,6 +545,8 @@ namespace auto & req = typeid_cast(request); ProfileEvents::increment(ProfileEvents::S3UploadPart); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3UploadPart); auto outcome = client_ptr->UploadPart(req); if (!outcome.IsSuccess()) @@ -535,11 +559,11 @@ namespace } }; - /// Helper class to help implementing copyFileS3ToS3(). - class CopyFileS3ToS3Helper : public UploadHelper + /// Helper class to help implementing copyS3File(). + class CopyFileHelper : public UploadHelper { public: - CopyFileS3ToS3Helper( + CopyFileHelper( const std::shared_ptr & client_ptr_, const String & src_bucket_, const String & src_key_, @@ -549,8 +573,9 @@ namespace const String & dest_key_, const S3Settings::RequestSettings & request_settings_, const std::optional> & object_metadata_, - ThreadPoolCallbackRunner schedule_) - : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, &Poco::Logger::get("copyFileS3ToS3")) + ThreadPoolCallbackRunner schedule_, + bool for_disk_s3_) + : UploadHelper(client_ptr_, dest_bucket_, dest_key_, request_settings_, object_metadata_, schedule_, for_disk_s3_, &Poco::Logger::get("copyS3File")) , src_bucket(src_bucket_) , src_key(src_key_) , offset(src_offset_) @@ -560,12 +585,12 @@ namespace void performCopy() { - if (size <= settings.max_single_operation_copy_size) + if (size <= upload_settings.max_single_operation_copy_size) performSingleOperationCopy(); else performMultipartUploadCopy(); - if (check_objects_after_upload) + if (request_settings.check_objects_after_upload) checkObjectAfterUpload(); } @@ -594,8 +619,9 @@ namespace request.SetMetadataDirective(Aws::S3::Model::MetadataDirective::REPLACE); } - if (!settings.storage_class_name.empty()) - request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(settings.storage_class_name)); + const auto & storage_class_name = upload_settings.storage_class_name; + if (!storage_class_name.empty()) + request.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(storage_class_name)); /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 request.SetContentType("binary/octet-stream"); @@ -603,9 +629,13 @@ namespace void processCopyRequest(const Aws::S3::Model::CopyObjectRequest & request) { - size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); - for (size_t i = 0; i < max_retry; ++i) + size_t max_retries = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + for (size_t retries = 1;; ++retries) { + ProfileEvents::increment(ProfileEvents::S3CopyObject); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3CopyObject); + auto outcome = client_ptr->CopyObject(request); if (outcome.IsSuccess()) { @@ -617,7 +647,8 @@ namespace size); break; } - else if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest") + + if (outcome.GetError().GetExceptionName() == "EntityTooLarge" || outcome.GetError().GetExceptionName() == "InvalidRequest") { // Can't come here with MinIO, MinIO allows single part upload for large objects. LOG_INFO( @@ -630,7 +661,8 @@ namespace performMultipartUploadCopy(); break; } - else if (outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) + + if ((outcome.GetError().GetErrorType() == Aws::S3::S3Errors::NO_SUCH_KEY) && (retries < max_retries)) { /// TODO: Is it true for copy requests? /// For unknown reason, at least MinIO can respond with NO_SUCH_KEY for put requests @@ -640,18 +672,16 @@ namespace dest_bucket, dest_key, size); - /// will retry - } - else - { - throw S3Exception( - outcome.GetError().GetErrorType(), - "Message: {}, Key: {}, Bucket: {}, Object size: {}", - outcome.GetError().GetMessage(), - dest_key, - dest_bucket, - size); + continue; /// will retry } + + throw S3Exception( + outcome.GetError().GetErrorType(), + "Message: {}, Key: {}, Bucket: {}, Object size: {}", + outcome.GetError().GetMessage(), + dest_key, + dest_bucket, + size); } } @@ -676,11 +706,15 @@ namespace { auto & req = typeid_cast(request); + ProfileEvents::increment(ProfileEvents::S3UploadPartCopy); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3UploadPartCopy); + auto outcome = client_ptr->UploadPartCopy(req); if (!outcome.IsSuccess()) { abortMultipartUpload(); - throw Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + throw Exception::createDeprecated(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); } return outcome.GetResult().GetCopyPartResult().GetETag(); @@ -689,7 +723,7 @@ namespace } -void copyDataToS3( +void copyDataToS3File( const std::function()> & create_read_buffer, size_t offset, size_t size, @@ -698,14 +732,15 @@ void copyDataToS3( const String & dest_key, const S3Settings::RequestSettings & settings, const std::optional> & object_metadata, - ThreadPoolCallbackRunner schedule) + ThreadPoolCallbackRunner schedule, + bool for_disk_s3) { - CopyDataToS3Helper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule}; + CopyDataToFileHelper helper{create_read_buffer, offset, size, dest_s3_client, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; helper.performCopy(); } -void copyFileS3ToS3( +void copyS3File( const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -715,9 +750,10 @@ void copyFileS3ToS3( const String & dest_key, const S3Settings::RequestSettings & settings, const std::optional> & object_metadata, - ThreadPoolCallbackRunner schedule) + ThreadPoolCallbackRunner schedule, + bool for_disk_s3) { - CopyFileS3ToS3Helper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule}; + CopyFileHelper helper{s3_client, src_bucket, src_key, src_offset, src_size, dest_bucket, dest_key, settings, object_metadata, schedule, for_disk_s3}; helper.performCopy(); } diff --git a/src/IO/S3/copyDataToS3.h b/src/IO/S3/copyS3File.h similarity index 82% rename from src/IO/S3/copyDataToS3.h rename to src/IO/S3/copyS3File.h index 1ad308fa1d4..808860650b8 100644 --- a/src/IO/S3/copyDataToS3.h +++ b/src/IO/S3/copyS3File.h @@ -16,27 +16,11 @@ namespace DB { class SeekableReadBuffer; -/// Copies data from any seekable source to S3. -/// The same functionality can be done by using the function copyData() and the class WriteBufferFromS3 -/// however copyDataToS3() is faster and spends less memory. -/// The callback `create_read_buffer` can be called from multiple threads in parallel, so that should be thread-safe. -/// The parameters `offset` and `size` specify a part in the source to copy. -void copyDataToS3( - const std::function()> & create_read_buffer, - size_t offset, - size_t size, - const std::shared_ptr & dest_s3_client, - const String & dest_bucket, - const String & dest_key, - const S3Settings::RequestSettings & settings, - const std::optional> & object_metadata = std::nullopt, - ThreadPoolCallbackRunner schedule_ = {}); - /// Copies a file from S3 to S3. /// The same functionality can be done by using the function copyData() and the classes ReadBufferFromS3 and WriteBufferFromS3 -/// however copyFileS3ToS3() is faster and spends less network traffic and memory. +/// however copyS3File() is faster and spends less network traffic and memory. /// The parameters `src_offset` and `src_size` specify a part in the source to copy. -void copyFileS3ToS3( +void copyS3File( const std::shared_ptr & s3_client, const String & src_bucket, const String & src_key, @@ -46,7 +30,25 @@ void copyFileS3ToS3( const String & dest_key, const S3Settings::RequestSettings & settings, const std::optional> & object_metadata = std::nullopt, - ThreadPoolCallbackRunner schedule_ = {}); + ThreadPoolCallbackRunner schedule_ = {}, + bool for_disk_s3 = false); + +/// Copies data from any seekable source to S3. +/// The same functionality can be done by using the function copyData() and the class WriteBufferFromS3 +/// however copyDataToS3File() is faster and spends less memory. +/// The callback `create_read_buffer` can be called from multiple threads in parallel, so that should be thread-safe. +/// The parameters `offset` and `size` specify a part in the source to copy. +void copyDataToS3File( + const std::function()> & create_read_buffer, + size_t offset, + size_t size, + const std::shared_ptr & dest_s3_client, + const String & dest_bucket, + const String & dest_key, + const S3Settings::RequestSettings & settings, + const std::optional> & object_metadata = std::nullopt, + ThreadPoolCallbackRunner schedule_ = {}, + bool for_disk_s3 = false); } diff --git a/src/IO/S3/getObjectInfo.cpp b/src/IO/S3/getObjectInfo.cpp new file mode 100644 index 00000000000..04871682296 --- /dev/null +++ b/src/IO/S3/getObjectInfo.cpp @@ -0,0 +1,218 @@ +#include + +#if USE_AWS_S3 +#include +#include +#include +#include + + +namespace ErrorCodes +{ + extern const int S3_ERROR; +} + + +namespace ProfileEvents +{ + extern const Event S3GetObject; + extern const Event S3GetObjectAttributes; + extern const Event S3HeadObject; + extern const Event DiskS3GetObject; + extern const Event DiskS3GetObjectAttributes; + extern const Event DiskS3HeadObject; +} + + +namespace DB::S3 +{ + +namespace +{ + Aws::S3::Model::HeadObjectOutcome headObject( + const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) + { + ProfileEvents::increment(ProfileEvents::S3HeadObject); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3HeadObject); + + Aws::S3::Model::HeadObjectRequest req; + req.SetBucket(bucket); + req.SetKey(key); + + if (!version_id.empty()) + req.SetVersionId(version_id); + + return client.HeadObject(req); + } + + Aws::S3::Model::GetObjectAttributesOutcome getObjectAttributes(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) + { + ProfileEvents::increment(ProfileEvents::S3GetObjectAttributes); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3GetObjectAttributes); + + Aws::S3::Model::GetObjectAttributesRequest req; + req.SetBucket(bucket); + req.SetKey(key); + + if (!version_id.empty()) + req.SetVersionId(version_id); + + req.SetObjectAttributes({Aws::S3::Model::ObjectAttributes::ObjectSize}); + + return client.GetObjectAttributes(req); + } + + Aws::S3::Model::GetObjectOutcome getObjectDummy(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) + { + ProfileEvents::increment(ProfileEvents::S3GetObject); + if (for_disk_s3) + ProfileEvents::increment(ProfileEvents::DiskS3GetObject); + + Aws::S3::Model::GetObjectRequest req; + req.SetBucket(bucket); + req.SetKey(key); + + if (!version_id.empty()) + req.SetVersionId(version_id); + + /// Only the first byte will be read. + /// We don't need that first byte but the range should be set otherwise the entire object will be read. + req.SetRange("bytes=0-0"); + + return client.GetObject(req); + } + + + /// Performs a request to get the size and last modification time of an object. + /// The function performs either HeadObject or GetObjectAttributes request depending on the endpoint. + std::pair, Aws::S3::S3Error> tryGetObjectInfo( + const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, + const S3Settings::RequestSettings & request_settings, bool with_metadata, bool for_disk_s3) + { + if (request_settings.allow_head_object_request) + { + auto outcome = headObject(client, bucket, key, version_id, for_disk_s3); + if (!outcome.IsSuccess()) + return {std::nullopt, outcome.GetError()}; + + const auto & result = outcome.GetResult(); + ObjectInfo object_info; + object_info.size = static_cast(result.GetContentLength()); + object_info.last_modification_time = result.GetLastModified().Millis() / 1000; + + if (with_metadata) + object_info.metadata = result.GetMetadata(); + + return {object_info, {}}; + } + else + { + ObjectInfo object_info; + + { + auto outcome = getObjectAttributes(client, bucket, key, version_id, for_disk_s3); + if (!outcome.IsSuccess()) + return {std::nullopt, outcome.GetError()}; + + const auto & result = outcome.GetResult(); + object_info.size = static_cast(result.GetObjectSize()); + object_info.last_modification_time = result.GetLastModified().Millis() / 1000; + } + + if (with_metadata) + { + auto outcome = getObjectDummy(client, bucket, key, version_id, for_disk_s3); + if (!outcome.IsSuccess()) + return {std::nullopt, outcome.GetError()}; + + const auto & result = outcome.GetResult(); + object_info.metadata = result.GetMetadata(); + } + + return {object_info, {}}; + } + } +} + + +bool isNotFoundError(Aws::S3::S3Errors error) +{ + return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY; +} + +ObjectInfo getObjectInfo( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id, + const S3Settings::RequestSettings & request_settings, + bool with_metadata, + bool for_disk_s3, + bool throw_on_error) +{ + auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, with_metadata, for_disk_s3); + if (object_info) + { + return *object_info; + } + else if (throw_on_error) + { + throw DB::Exception(ErrorCodes::S3_ERROR, + "Failed to get object attributes: {}. HTTP response code: {}", + error.GetMessage(), static_cast(error.GetResponseCode())); + } + return {}; +} + +size_t getObjectSize( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id, + const S3Settings::RequestSettings & request_settings, + bool for_disk_s3, + bool throw_on_error) +{ + return getObjectInfo(client, bucket, key, version_id, request_settings, {}, for_disk_s3, throw_on_error).size; +} + +bool objectExists( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id, + const S3Settings::RequestSettings & request_settings, + bool for_disk_s3) +{ + auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, {}, for_disk_s3); + if (object_info) + return true; + + if (isNotFoundError(error.GetErrorType())) + return false; + + throw S3Exception(error.GetErrorType(), + "Failed to check existence of key {} in bucket {}: {}", + key, bucket, error.GetMessage()); +} + +void checkObjectExists( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id, + const S3Settings::RequestSettings & request_settings, + bool for_disk_s3, + std::string_view description) +{ + auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, request_settings, {}, for_disk_s3); + if (object_info) + return; + throw S3Exception(error.GetErrorType(), "{}Object {} in bucket {} suddenly disappeared: {}", + (description.empty() ? "" : (String(description) + ": ")), key, bucket, error.GetMessage()); +} +} + +#endif diff --git a/src/IO/S3/getObjectInfo.h b/src/IO/S3/getObjectInfo.h new file mode 100644 index 00000000000..363e0601f32 --- /dev/null +++ b/src/IO/S3/getObjectInfo.h @@ -0,0 +1,63 @@ +#pragma once + +#include "config.h" + +#if USE_AWS_S3 +#include +#include +#include + + +namespace DB::S3 +{ + +struct ObjectInfo +{ + size_t size = 0; + time_t last_modification_time = 0; + + std::map metadata; /// Set only if getObjectInfo() is called with `with_metadata = true`. +}; + +ObjectInfo getObjectInfo( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id = {}, + const S3Settings::RequestSettings & request_settings = {}, + bool with_metadata = false, + bool for_disk_s3 = false, + bool throw_on_error = true); + +size_t getObjectSize( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id = {}, + const S3Settings::RequestSettings & request_settings = {}, + bool for_disk_s3 = false, + bool throw_on_error = true); + +bool objectExists( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id = {}, + const S3Settings::RequestSettings & request_settings = {}, + bool for_disk_s3 = false); + +/// Throws an exception if a specified object doesn't exist. `description` is used as a part of the error message. +void checkObjectExists( + const Aws::S3::S3Client & client, + const String & bucket, + const String & key, + const String & version_id = {}, + const S3Settings::RequestSettings & request_settings = {}, + bool for_disk_s3 = false, + std::string_view description = {}); + +bool isNotFoundError(Aws::S3::S3Errors error); + +} + +#endif diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index cd4ab87469b..e590cbdcf31 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -705,92 +705,6 @@ public: } }; -/// Extracts the endpoint from a constructed S3 client. -String getEndpoint(const Aws::S3::S3Client & client) -{ - const auto * endpoint_provider = dynamic_cast(const_cast(client).accessEndpointProvider().get()); - if (!endpoint_provider) - return {}; - String endpoint; - endpoint_provider->GetBuiltInParameters().GetParameter("Endpoint").GetString(endpoint); - return endpoint; -} - -/// Performs a request to get the size and last modification time of an object. -/// The function performs either HeadObject or GetObjectAttributes request depending on the endpoint. -std::pair, Aws::S3::S3Error> tryGetObjectInfo( - const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) -{ - auto endpoint = getEndpoint(client); - bool use_get_object_attributes_request = (endpoint.find(".amazonaws.com") != String::npos); - - if (use_get_object_attributes_request) - { - /// It's better not to use `HeadObject` requests for AWS S3 because they don't work well with the global region. - /// Details: `HeadObject` request never returns a response body (even if there is an error) however - /// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv" - /// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways - /// to determine the correct region and try to repeat the request again with the correct region. - /// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies, - /// but for `HeadObject` there is no response body so this way doesn't work. That's why we use `GetObjectAttributes` request instead. - /// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information. - - ProfileEvents::increment(ProfileEvents::S3GetObjectAttributes); - if (for_disk_s3) - ProfileEvents::increment(ProfileEvents::DiskS3GetObjectAttributes); - - Aws::S3::Model::GetObjectAttributesRequest req; - req.SetBucket(bucket); - req.SetKey(key); - - if (!version_id.empty()) - req.SetVersionId(version_id); - - req.SetObjectAttributes({Aws::S3::Model::ObjectAttributes::ObjectSize}); - - auto outcome = client.GetObjectAttributes(req); - if (outcome.IsSuccess()) - { - const auto & result = outcome.GetResult(); - DB::S3::ObjectInfo object_info; - object_info.size = static_cast(result.GetObjectSize()); - object_info.last_modification_time = result.GetLastModified().Millis() / 1000; - return {object_info, {}}; - } - - return {std::nullopt, outcome.GetError()}; - } - else - { - /// By default we use `HeadObject` requests. - /// We cannot just use `GetObjectAttributes` requests always because some S3 providers (e.g. Minio) - /// don't support `GetObjectAttributes` requests. - - ProfileEvents::increment(ProfileEvents::S3HeadObject); - if (for_disk_s3) - ProfileEvents::increment(ProfileEvents::DiskS3HeadObject); - - Aws::S3::Model::HeadObjectRequest req; - req.SetBucket(bucket); - req.SetKey(key); - - if (!version_id.empty()) - req.SetVersionId(version_id); - - auto outcome = client.HeadObject(req); - if (outcome.IsSuccess()) - { - const auto & result = outcome.GetResult(); - DB::S3::ObjectInfo object_info; - object_info.size = static_cast(result.GetContentLength()); - object_info.last_modification_time = result.GetLastModified().Millis() / 1000; - return {object_info, {}}; - } - - return {std::nullopt, outcome.GetError()}; - } -} - } @@ -951,7 +865,9 @@ namespace S3 boost::to_upper(name); if (name != S3 && name != COS && name != OBS && name != OSS) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", quoteString(name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Object storage system name is unrecognized in virtual hosted style S3 URI: {}", + quoteString(name)); if (name == S3) storage_name = name; @@ -977,91 +893,9 @@ namespace S3 /// S3 specification requires at least 3 and at most 63 characters in bucket name. /// https://docs.aws.amazon.com/awscloudtrail/latest/userguide/cloudtrail-s3-bucket-naming-requirements.html if (bucket.length() < 3 || bucket.length() > 63) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}", + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bucket name length is out of bounds in virtual hosted style S3 URI: {}{}", quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : ""); } - - bool isNotFoundError(Aws::S3::S3Errors error) - { - return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY; - } - - ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error) - { - auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3); - if (object_info) - { - return *object_info; - } - else if (throw_on_error) - { - throw DB::Exception(ErrorCodes::S3_ERROR, - "Failed to get object attributes: {}. HTTP response code: {}", - error.GetMessage(), static_cast(error.GetResponseCode())); - } - return {}; - } - - size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error) - { - return getObjectInfo(client, bucket, key, version_id, for_disk_s3, throw_on_error).size; - } - - bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) - { - auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3); - if (object_info) - return true; - - if (isNotFoundError(error.GetErrorType())) - return false; - - throw S3Exception(error.GetErrorType(), - "Failed to check existence of key {} in bucket {}: {}", - key, bucket, error.GetMessage()); - } - - void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, std::string_view description) - { - auto [object_info, error] = tryGetObjectInfo(client, bucket, key, version_id, for_disk_s3); - if (object_info) - return; - throw S3Exception(error.GetErrorType(), "{}Object {} in bucket {} suddenly disappeared: {}", - (description.empty() ? "" : (String(description) + ": ")), key, bucket, error.GetMessage()); - } - - std::map getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3, bool throw_on_error) - { - ProfileEvents::increment(ProfileEvents::S3GetObjectMetadata); - if (for_disk_s3) - ProfileEvents::increment(ProfileEvents::DiskS3GetObjectMetadata); - - /// We must not use the `HeadObject` request, see the comment about `HeadObjectRequest` in S3Common.h. - - Aws::S3::Model::GetObjectRequest req; - req.SetBucket(bucket); - req.SetKey(key); - - /// Only the first byte will be read. - /// We don't need that first byte but the range should be set otherwise the entire object will be read. - req.SetRange("bytes=0-0"); - - if (!version_id.empty()) - req.SetVersionId(version_id); - - auto outcome = client.GetObject(req); - - if (outcome.IsSuccess()) - return outcome.GetResult().GetMetadata(); - - if (!throw_on_error) - return {}; - - const auto & error = outcome.GetError(); - throw S3Exception(error.GetErrorType(), - "Failed to get metadata of key {} in bucket {}: {}", - key, bucket, error.GetMessage()); - } } } @@ -1104,7 +938,7 @@ AuthSettings AuthSettings::loadFromConfig(const std::string & config_elem, const auto header_str = config.getString(config_elem + "." + subkey); auto delimiter = header_str.find(':'); if (delimiter == std::string::npos) - throw Exception("Malformed s3 header value", ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Malformed s3 header value"); headers.emplace_back(header_str.substr(0, delimiter), header_str.substr(delimiter + 1, String::npos)); } } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index 69ae1cbb4f4..19e660a338d 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -121,29 +121,6 @@ struct URI static void validateBucket(const String & bucket, const Poco::URI & uri); }; -/// WARNING: Don't use `HeadObjectRequest`! Use the functions below instead. -/// For explanation see the comment about `HeadObject` request in the function tryGetObjectInfo(). - -struct ObjectInfo -{ - size_t size = 0; - time_t last_modification_time = 0; -}; - -ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true); - -size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true); - -bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false); - -/// Throws an exception if a specified object doesn't exist. `description` is used as a part of the error message. -void checkObjectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, std::string_view description = {}); - -bool isNotFoundError(Aws::S3::S3Errors error); - -/// Returns the object's metadata. -std::map getObjectMetadata(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false, bool throw_on_error = true); - } #endif diff --git a/src/IO/SnappyReadBuffer.cpp b/src/IO/SnappyReadBuffer.cpp index dbdf32a6d07..c554b9991d9 100644 --- a/src/IO/SnappyReadBuffer.cpp +++ b/src/IO/SnappyReadBuffer.cpp @@ -37,7 +37,7 @@ bool SnappyReadBuffer::nextImpl() bool success = snappy::Uncompress(compress_buffer.data(), wb.count(), &uncompress_buffer); if (!success) { - throw Exception("snappy uncomress failed: ", ErrorCodes::SNAPPY_UNCOMPRESS_FAILED); + throw Exception(ErrorCodes::SNAPPY_UNCOMPRESS_FAILED, "snappy uncomress failed: "); } BufferBase::set(const_cast(uncompress_buffer.data()), uncompress_buffer.size(), 0); return true; @@ -55,14 +55,13 @@ off_t SnappyReadBuffer::seek(off_t off, int whence) else if (whence == SEEK_CUR) new_pos = count() + off; else - throw Exception("Only SEEK_SET and SEEK_CUR seek modes allowed.", ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Only SEEK_SET and SEEK_CUR seek modes allowed."); working_buffer = internal_buffer; if (new_pos < 0 || new_pos > off_t(working_buffer.size())) - throw Exception( - String("Cannot seek through buffer") + " because seek position (" + toString(new_pos) + ") is out of bounds [0, " - + toString(working_buffer.size()) + "]", - ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, + "Cannot seek through buffer because seek position ({}) is out of bounds [0, {}]", + new_pos, working_buffer.size()); position() = working_buffer.begin() + new_pos; return new_pos; } diff --git a/src/IO/SnappyWriteBuffer.cpp b/src/IO/SnappyWriteBuffer.cpp index ada9afebcf5..ca40d0656d1 100644 --- a/src/IO/SnappyWriteBuffer.cpp +++ b/src/IO/SnappyWriteBuffer.cpp @@ -64,7 +64,7 @@ void SnappyWriteBuffer::finishImpl() bool success = snappy::Compress(uncompress_buffer.data(), uncompress_buffer.size(), &compress_buffer); if (!success) { - throw Exception("snappy compress failed: ", ErrorCodes::SNAPPY_COMPRESS_FAILED); + throw Exception(ErrorCodes::SNAPPY_COMPRESS_FAILED, "snappy compress failed: "); } char * in_data = compress_buffer.data(); diff --git a/src/IO/SwapHelper.h b/src/IO/SwapHelper.h new file mode 100644 index 00000000000..cedbf5f78fe --- /dev/null +++ b/src/IO/SwapHelper.h @@ -0,0 +1,16 @@ +#pragma once +#include + +namespace DB +{ + class SwapHelper + { + public: + SwapHelper(BufferBase & b1_, BufferBase & b2_) : b1(b1_), b2(b2_) { b1.swap(b2); } + ~SwapHelper() { b1.swap(b2); } + + private: + BufferBase & b1; + BufferBase & b2; + }; +} diff --git a/src/IO/VarInt.h b/src/IO/VarInt.h index 816aa8fd057..0869051034a 100644 --- a/src/IO/VarInt.h +++ b/src/IO/VarInt.h @@ -119,7 +119,7 @@ inline void readVarUInt(T & x, ReadBuffer & istr) [[noreturn]] inline void throwReadAfterEOF() { - throw Exception("Attempt to read after eof", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF); + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after eof"); } template diff --git a/src/IO/WithFileName.cpp b/src/IO/WithFileName.cpp index f470d9f7f97..6ecb3671ca0 100644 --- a/src/IO/WithFileName.cpp +++ b/src/IO/WithFileName.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB { @@ -19,6 +20,8 @@ String getFileNameFromReadBuffer(const ReadBuffer & in) return getFileName(compressed->getWrappedReadBuffer()); else if (const auto * parallel = dynamic_cast(&in)) return getFileName(parallel->getReadBufferFactory()); + else if (const auto * peekable = dynamic_cast(&in)) + return getFileNameFromReadBuffer(peekable->getSubBuffer()); else return getFileName(in); } diff --git a/src/IO/WriteBuffer.h b/src/IO/WriteBuffer.h index 1fc21e1ac17..436d07515a3 100644 --- a/src/IO/WriteBuffer.h +++ b/src/IO/WriteBuffer.h @@ -152,7 +152,7 @@ private: /** Write the data in the buffer (from the beginning of the buffer to the current position). * Throw an exception if something is wrong. */ - virtual void nextImpl() { throw Exception("Cannot write after end of buffer.", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER); } + virtual void nextImpl() { throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, "Cannot write after end of buffer."); } }; diff --git a/src/IO/WriteBufferFromFile.cpp b/src/IO/WriteBufferFromFile.cpp index b30b4b973f9..91a4608c46e 100644 --- a/src/IO/WriteBufferFromFile.cpp +++ b/src/IO/WriteBufferFromFile.cpp @@ -93,7 +93,7 @@ void WriteBufferFromFile::close() next(); if (0 != ::close(fd)) - throw Exception("Cannot close file", ErrorCodes::CANNOT_CLOSE_FILE); + throw Exception(ErrorCodes::CANNOT_CLOSE_FILE, "Cannot close file"); fd = -1; metric_increment.destroy(); diff --git a/src/IO/WriteBufferFromOStream.cpp b/src/IO/WriteBufferFromOStream.cpp index 1d950fded28..2d0d5976f85 100644 --- a/src/IO/WriteBufferFromOStream.cpp +++ b/src/IO/WriteBufferFromOStream.cpp @@ -18,8 +18,7 @@ void WriteBufferFromOStream::nextImpl() ostr->flush(); if (!ostr->good()) - throw Exception("Cannot write to ostream at offset " + std::to_string(count()), - ErrorCodes::CANNOT_WRITE_TO_OSTREAM); + throw Exception(ErrorCodes::CANNOT_WRITE_TO_OSTREAM, "Cannot write to ostream at offset {}", count()); } WriteBufferFromOStream::WriteBufferFromOStream( diff --git a/src/IO/WriteBufferFromPocoSocket.cpp b/src/IO/WriteBufferFromPocoSocket.cpp index 95d532e9bd4..e01245849ae 100644 --- a/src/IO/WriteBufferFromPocoSocket.cpp +++ b/src/IO/WriteBufferFromPocoSocket.cpp @@ -64,21 +64,21 @@ void WriteBufferFromPocoSocket::nextImpl() } catch (const Poco::Net::NetException & e) { - throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({})", e.displayText(), peer_address.toString()); } catch (const Poco::TimeoutException &) { - throw NetException(fmt::format("Timeout exceeded while writing to socket ({}, {} ms)", + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while writing to socket ({}, {} ms)", peer_address.toString(), - socket.impl()->getSendTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + socket.impl()->getSendTimeout().totalMilliseconds()); } catch (const Poco::IOException & e) { - throw NetException(e.displayText() + ", while writing to socket (" + peer_address.toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while writing to socket ({})", e.displayText(), peer_address.toString()); } if (res < 0) - throw NetException("Cannot write to socket (" + peer_address.toString() + ")", ErrorCodes::CANNOT_WRITE_TO_SOCKET); + throw NetException(ErrorCodes::CANNOT_WRITE_TO_SOCKET, "Cannot write to socket ({})", peer_address.toString()); bytes_written += res; } diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 7d279c07e03..c71a0e6a252 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include @@ -39,6 +39,9 @@ namespace ProfileEvents extern const Event DiskS3CompleteMultipartUpload; extern const Event DiskS3UploadPart; extern const Event DiskS3PutObject; + + extern const Event RemoteWriteThrottlerBytes; + extern const Event RemoteWriteThrottlerSleepMicroseconds; } namespace DB @@ -73,7 +76,7 @@ WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings, + const S3Settings::RequestSettings & request_settings_, std::optional> object_metadata_, size_t buffer_size_, ThreadPoolCallbackRunner schedule_, @@ -81,12 +84,11 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , settings(request_settings.getUploadSettings()) - , check_objects_after_upload(request_settings.check_objects_after_upload) - , max_unexpected_write_error_retries(request_settings.max_unexpected_write_error_retries) + , request_settings(request_settings_) + , upload_settings(request_settings.getUploadSettings()) , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) - , upload_part_size(settings.min_upload_part_size) + , upload_part_size(upload_settings.min_upload_part_size) , schedule(std::move(schedule_)) , write_settings(write_settings_) { @@ -108,10 +110,10 @@ void WriteBufferFromS3::nextImpl() ProfileEvents::increment(ProfileEvents::WriteBufferFromS3Bytes, offset()); last_part_size += offset(); if (write_settings.remote_throttler) - write_settings.remote_throttler->add(offset()); + write_settings.remote_throttler->add(offset(), ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds); /// Data size exceeds singlepart upload threshold, need to use multipart upload. - if (multipart_upload_id.empty() && last_part_size > settings.max_single_part_upload_size) + if (multipart_upload_id.empty() && last_part_size > upload_settings.max_single_part_upload_size) createMultipartUpload(); chassert(upload_part_size > 0); @@ -179,10 +181,10 @@ void WriteBufferFromS3::finalizeImpl() if (!multipart_upload_id.empty()) completeMultipartUpload(); - if (check_objects_after_upload) + if (request_settings.check_objects_after_upload) { LOG_TRACE(log, "Checking object {} exists after upload", key); - S3::checkObjectExists(*client_ptr, bucket, key, {}, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload"); + S3::checkObjectExists(*client_ptr, bucket, key, {}, request_settings, /* for_disk_s3= */ write_settings.for_object_storage, "Immediately after upload"); LOG_TRACE(log, "Object {} exists after upload", key); } } @@ -300,15 +302,22 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re { /// Increase part number. ++part_number; - if (!multipart_upload_id.empty() && (part_number > settings.max_part_number)) + + auto max_part_number = upload_settings.max_part_number; + + if (!multipart_upload_id.empty() && (part_number > max_part_number)) { throw Exception( - ErrorCodes::INVALID_CONFIG_PARAMETER, - "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " - "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}", - settings.max_part_number, count(), settings.min_upload_part_size, settings.max_upload_part_size, - settings.upload_part_size_multiply_factor, settings.upload_part_size_multiply_parts_count_threshold, - settings.max_single_part_upload_size); + ErrorCodes::INVALID_CONFIG_PARAMETER, + "Part number exceeded {} while writing {} bytes to S3. " + "Check min_upload_part_size = {}, max_upload_part_size = {}, " + "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, " + "max_single_part_upload_size = {}", + max_part_number, count(), + upload_settings.min_upload_part_size, upload_settings.max_upload_part_size, + upload_settings.upload_part_size_multiply_factor, + upload_settings.upload_part_size_multiply_parts_count_threshold, + upload_settings.max_single_part_upload_size); } /// Setup request. @@ -323,10 +332,13 @@ void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & re req.SetContentType("binary/octet-stream"); /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`). - if (!multipart_upload_id.empty() && (part_number % settings.upload_part_size_multiply_parts_count_threshold == 0)) + auto threshold = upload_settings.upload_part_size_multiply_parts_count_threshold; + if (!multipart_upload_id.empty() && (part_number % threshold == 0)) { - upload_part_size *= settings.upload_part_size_multiply_factor; - upload_part_size = std::min(upload_part_size, settings.max_upload_part_size); + auto max_upload_part_size = upload_settings.max_upload_part_size; + auto upload_part_size_multiply_factor = upload_settings.upload_part_size_multiply_factor; + upload_part_size *= upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, max_upload_part_size); } } @@ -355,7 +367,7 @@ void WriteBufferFromS3::completeMultipartUpload() LOG_TRACE(log, "Completing multipart upload. Bucket: {}, Key: {}, Upload_id: {}, Parts: {}", bucket, key, multipart_upload_id, tags.size()); if (tags.empty()) - throw Exception("Failed to complete multipart upload. No parts have uploaded", ErrorCodes::S3_ERROR); + throw Exception(ErrorCodes::S3_ERROR, "Failed to complete multipart upload. No parts have uploaded"); Aws::S3::Model::CompleteMultipartUploadRequest req; req.SetBucket(bucket); @@ -371,7 +383,7 @@ void WriteBufferFromS3::completeMultipartUpload() req.SetMultipartUpload(multipart_upload); - size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); @@ -470,8 +482,8 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req) req.SetBody(temporary_buffer); if (object_metadata.has_value()) req.SetMetadata(object_metadata.value()); - if (!settings.storage_class_name.empty()) - req.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(settings.storage_class_name)); + if (!upload_settings.storage_class_name.empty()) + req.SetStorageClass(Aws::S3::Model::StorageClassMapper::GetStorageClassForName(upload_settings.storage_class_name)); /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 req.SetContentType("binary/octet-stream"); @@ -479,7 +491,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req) void WriteBufferFromS3::processPutRequest(const PutObjectTask & task) { - size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3PutObject); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index 41ed009bcf9..8c81c49d32c 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -50,7 +50,7 @@ public: std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings, + const S3Settings::RequestSettings & request_settings_, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, ThreadPoolCallbackRunner schedule_ = {}, @@ -88,9 +88,8 @@ private: const String bucket; const String key; - const S3Settings::RequestSettings::PartUploadSettings settings; - const bool check_objects_after_upload = false; - const size_t max_unexpected_write_error_retries = 4; + const S3Settings::RequestSettings request_settings; + const S3Settings::RequestSettings::PartUploadSettings & upload_settings; const std::shared_ptr client_ptr; const std::optional> object_metadata; diff --git a/src/IO/WriteBufferFromVector.h b/src/IO/WriteBufferFromVector.h index 525e11518bd..4b2a3581625 100644 --- a/src/IO/WriteBufferFromVector.h +++ b/src/IO/WriteBufferFromVector.h @@ -81,7 +81,7 @@ private: void nextImpl() override { if (finalized) - throw Exception("WriteBufferFromVector is finalized", ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER); + throw Exception(ErrorCodes::CANNOT_WRITE_AFTER_END_OF_BUFFER, "WriteBufferFromVector is finalized"); size_t old_size = vector.size(); /// pos may not be equal to vector.data() + old_size, because WriteBuffer::next() can be used to flush data diff --git a/src/IO/WriteHelpers.h b/src/IO/WriteHelpers.h index a16381f7fe4..ebc2a9e34c1 100644 --- a/src/IO/WriteHelpers.h +++ b/src/IO/WriteHelpers.h @@ -161,7 +161,7 @@ inline size_t writeFloatTextFastPath(T x, char * buffer) } if (result <= 0) - throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER); + throw Exception(ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER, "Cannot print floating point number"); return result; } diff --git a/src/IO/WriteSettings.h b/src/IO/WriteSettings.h index 764d6c8992b..08f11e0bfd1 100644 --- a/src/IO/WriteSettings.h +++ b/src/IO/WriteSettings.h @@ -21,6 +21,8 @@ struct WriteSettings /// Monitoring bool for_object_storage = false; // to choose which profile events should be incremented + + bool operator==(const WriteSettings & other) const = default; }; } diff --git a/src/IO/ZlibDeflatingWriteBuffer.cpp b/src/IO/ZlibDeflatingWriteBuffer.cpp index 43014096e2a..82442ea9699 100644 --- a/src/IO/ZlibDeflatingWriteBuffer.cpp +++ b/src/IO/ZlibDeflatingWriteBuffer.cpp @@ -40,7 +40,7 @@ ZlibDeflatingWriteBuffer::ZlibDeflatingWriteBuffer( #pragma GCC diagnostic pop if (rc != Z_OK) - throw Exception(std::string("deflateInit2 failed: ") + zError(rc) + "; zlib version: " + ZLIB_VERSION, ErrorCodes::ZLIB_DEFLATE_FAILED); + throw Exception(ErrorCodes::ZLIB_DEFLATE_FAILED, "deflateInit2 failed: {}; zlib version: {}", zError(rc), ZLIB_VERSION); } void ZlibDeflatingWriteBuffer::nextImpl() @@ -63,7 +63,7 @@ void ZlibDeflatingWriteBuffer::nextImpl() out->position() = out->buffer().end() - zstr.avail_out; if (rc != Z_OK) - throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED); + throw Exception(ErrorCodes::ZLIB_DEFLATE_FAILED, "deflate failed: {}", zError(rc)); } while (zstr.avail_in > 0 || zstr.avail_out == 0); } @@ -102,7 +102,7 @@ void ZlibDeflatingWriteBuffer::finalizeBefore() out->position() = out->buffer().end() - zstr.avail_out; if (rc != Z_OK) - throw Exception(std::string("deflate failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED); + throw Exception(ErrorCodes::ZLIB_DEFLATE_FAILED, "deflate failed: {}", zError(rc)); } while (zstr.avail_out == 0); @@ -121,7 +121,7 @@ void ZlibDeflatingWriteBuffer::finalizeBefore() } if (rc != Z_OK) - throw Exception(std::string("deflate finalizeImpl() failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED); + throw Exception(ErrorCodes::ZLIB_DEFLATE_FAILED, "deflate finalizeImpl() failed: {}", zError(rc)); } } @@ -131,7 +131,7 @@ void ZlibDeflatingWriteBuffer::finalizeAfter() { int rc = deflateEnd(&zstr); if (rc != Z_OK) - throw Exception(std::string("deflateEnd failed: ") + zError(rc), ErrorCodes::ZLIB_DEFLATE_FAILED); + throw Exception(ErrorCodes::ZLIB_DEFLATE_FAILED, "deflateEnd failed: {}", zError(rc)); } catch (...) { diff --git a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp index 79fb4ccead5..f8c4d0e2bac 100644 --- a/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingAppendableWriteBuffer.cpp @@ -26,7 +26,9 @@ ZstdDeflatingAppendableWriteBuffer::ZstdDeflatingAppendableWriteBuffer( throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING); size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level); if (ZSTD_isError(ret)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder option setting failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING); + throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, + "zstd stream encoder option setting failed: error code: {}; zstd version: {}", + ret, ZSTD_VERSION_STRING); input = {nullptr, 0, 0}; output = {nullptr, 0, 0}; @@ -61,7 +63,9 @@ void ZstdDeflatingAppendableWriteBuffer::nextImpl() size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_flush); if (ZSTD_isError(compression_result)) throw Exception( - ErrorCodes::ZSTD_ENCODER_FAILED, "Zstd stream encoding failed: error code: {}; zstd version: {}", ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING); + ErrorCodes::ZSTD_ENCODER_FAILED, + "Zstd stream encoding failed: error code: {}; zstd version: {}", + ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING); first_write = false; out->position() = out->buffer().begin() + output.pos; @@ -133,11 +137,22 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeBefore() while (remaining != 0) { if (ZSTD_isError(remaining)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "Zstd stream encoder end failed: error: '{}' zstd version: {}", ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING); + throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, + "Zstd stream encoder end failed: error: '{}' zstd version: {}", + ZSTD_getErrorName(remaining), ZSTD_VERSION_STRING); remaining = ZSTD_compressStream2(cctx, &output, &input, ZSTD_e_end); + + out->position() = out->buffer().begin() + output.pos; + + if (!out->hasPendingData()) + { + out->next(); + output.dst = reinterpret_cast(out->buffer().begin()); + output.size = out->buffer().size(); + output.pos = out->offset(); + } } - out->position() = out->buffer().begin() + output.pos; } void ZstdDeflatingAppendableWriteBuffer::finalizeAfter() @@ -152,7 +167,8 @@ void ZstdDeflatingAppendableWriteBuffer::finalizeZstd() size_t err = ZSTD_freeCCtx(cctx); /// This is just in case, since it is impossible to get an error by using this wrapper. if (unlikely(err)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(err), ZSTD_VERSION_STRING); + throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", + ZSTD_getErrorName(err), ZSTD_VERSION_STRING); } catch (...) { diff --git a/src/IO/ZstdDeflatingWriteBuffer.cpp b/src/IO/ZstdDeflatingWriteBuffer.cpp index c7f9b0d718b..a12b59b80f5 100644 --- a/src/IO/ZstdDeflatingWriteBuffer.cpp +++ b/src/IO/ZstdDeflatingWriteBuffer.cpp @@ -17,10 +17,14 @@ ZstdDeflatingWriteBuffer::ZstdDeflatingWriteBuffer( throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder init failed: zstd version: {}", ZSTD_VERSION_STRING); size_t ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level); if (ZSTD_isError(ret)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder option setting failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING); + throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, + "zstd stream encoder option setting failed: error code: {}; zstd version: {}", + ret, ZSTD_VERSION_STRING); ret = ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1); if (ZSTD_isError(ret)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "zstd stream encoder option setting failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING); + throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, + "zstd stream encoder option setting failed: error code: {}; zstd version: {}", + ret, ZSTD_VERSION_STRING); input = {nullptr, 0, 0}; output = {nullptr, 0, 0}; @@ -58,7 +62,9 @@ void ZstdDeflatingWriteBuffer::nextImpl() size_t compression_result = ZSTD_compressStream2(cctx, &output, &input, mode); if (ZSTD_isError(compression_result)) throw Exception( - ErrorCodes::ZSTD_ENCODER_FAILED, "Zstd stream encoding failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING); + ErrorCodes::ZSTD_ENCODER_FAILED, + "Zstd stream encoding failed: error: '{}'; zstd version: {}", + ZSTD_getErrorName(compression_result), ZSTD_VERSION_STRING); out->position() = out->buffer().begin() + output.pos; @@ -103,7 +109,8 @@ void ZstdDeflatingWriteBuffer::finalizeAfter() size_t err = ZSTD_freeCCtx(cctx); /// This is just in case, since it is impossible to get an error by using this wrapper. if (unlikely(err)) - throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", ZSTD_getErrorName(err), ZSTD_VERSION_STRING); + throw Exception(ErrorCodes::ZSTD_ENCODER_FAILED, "ZSTD_freeCCtx failed: error: '{}'; zstd version: {}", + ZSTD_getErrorName(err), ZSTD_VERSION_STRING); } catch (...) { diff --git a/src/IO/ZstdInflatingReadBuffer.cpp b/src/IO/ZstdInflatingReadBuffer.cpp index 0d026cdab9a..6956bdb6710 100644 --- a/src/IO/ZstdInflatingReadBuffer.cpp +++ b/src/IO/ZstdInflatingReadBuffer.cpp @@ -58,7 +58,9 @@ bool ZstdInflatingReadBuffer::nextImpl() size_t ret = ZSTD_decompressStream(dctx, &output, &input); if (ZSTD_isError(ret)) throw Exception( - ErrorCodes::ZSTD_DECODER_FAILED, "Zstd stream encoding failed: error '{}'; zstd version: {}", ZSTD_getErrorName(ret), ZSTD_VERSION_STRING); + ErrorCodes::ZSTD_DECODER_FAILED, + "Zstd stream encoding failed: error '{}'; zstd version: {}", + ZSTD_getErrorName(ret), ZSTD_VERSION_STRING); /// Check that something has changed after decompress (input or output position) assert(in->eof() || output.pos > 0 || in->position() < in->buffer().begin() + input.pos); diff --git a/src/IO/copyData.cpp b/src/IO/copyData.cpp index 8a044b50de9..b189c318d67 100644 --- a/src/IO/copyData.cpp +++ b/src/IO/copyData.cpp @@ -34,7 +34,7 @@ void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t } if (check_bytes && bytes > 0) - throw Exception("Attempt to read after EOF.", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF); + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF."); } void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t bytes, std::function cancellation_hook, ThrottlerPtr throttler) @@ -56,7 +56,7 @@ void copyDataImpl(ReadBuffer & from, WriteBuffer & to, bool check_bytes, size_t } if (check_bytes && bytes > 0) - throw Exception("Attempt to read after EOF.", ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF); + throw Exception(ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF, "Attempt to read after EOF."); } } diff --git a/src/IO/examples/limit_read_buffer2.cpp b/src/IO/examples/limit_read_buffer2.cpp index d70ad832020..ac7c43d764c 100644 --- a/src/IO/examples/limit_read_buffer2.cpp +++ b/src/IO/examples/limit_read_buffer2.cpp @@ -20,9 +20,6 @@ try { using namespace DB; - std::stringstream s; // STYLE_CHECK_ALLOW_STD_STRING_STREAM - s.exceptions(std::ios::failbit); - { std::string src = "1"; @@ -39,21 +36,12 @@ try } if (limit_in.count() != 1) - { - s << "Failed!, incorrect count(): " << limit_in.count(); - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, incorrect count(): {}", limit_in.count()); if (in.count() != limit_in.count()) - { - s << "Failed!, incorrect underlying buffer's count(): " << in.count(); - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, incorrect underlying buffer's count(): {}", in.count()); if (src != dst) - { - s << "Failed!, incorrect destination value, read: " << dst << ", expected: " << src; - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, incorrect destination value, read: {}, expected: {}", dst, src); } { std::string src = "abc"; @@ -73,38 +61,23 @@ try if (in.count() != 2) - { - s << "Failed!, Incorrect underlying buffer's count: " << in.count() << ", expected: " << 2; - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, Incorrect underlying buffer's count: {}, expected: {}", in.count(), 2); if (limit_in.count() != 1) - { - s << "Failed!, Incorrect count: " << limit_in.count() << ", expected: " << 1; - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, Incorrect count: {}, expected: {}", limit_in.count(), 1); } if (dst != "b") - { - s << "Failed!, Incorrect destination value: " << dst << ", expected 'b'"; - throw Exception(dst, ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, Incorrect destination value: {}, expected 'b'", dst); char y; readChar(y, in); if (y != 'c') - { - s << "Failed!, Read incorrect value from underlying buffer: " << y << ", expected 'c'"; - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, Read incorrect value from underlying buffer: {}, expected 'c'", y); while (!in.eof()) in.ignore(); if (in.count() != 3) - { - s << "Failed!, Incorrect final count from underlying buffer: " << in.count() << ", expected: 3"; - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, Incorrect final count from underlying buffer: {}, expected: 3", in.count()); } { @@ -118,17 +91,11 @@ try readChar(x, limit_in); if (limit_in.count() != 1) - { - s << "Failed!, Incorrect count: " << limit_in.count() << ", expected: " << 1; - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, Incorrect count: {}, expected: {}", limit_in.count(), 1); } if (in.count() != 1) - { - s << "Failed!, Incorrect final count from underlying buffer: " << in.count() << ", expected: 1"; - throw Exception(s.str(), ErrorCodes::LOGICAL_ERROR); - } + throw Exception(ErrorCodes::LOGICAL_ERROR, "Failed!, Incorrect final count from underlying buffer: {}, expected: 1", in.count()); } return 0; diff --git a/src/IO/examples/lzma_buffers.cpp b/src/IO/examples/lzma_buffers.cpp index ff3d518bfab..126a192737b 100644 --- a/src/IO/examples/lzma_buffers.cpp +++ b/src/IO/examples/lzma_buffers.cpp @@ -48,7 +48,7 @@ try lzma_buf.ignore(); if (x != i) - throw DB::Exception("Failed!, read: " + std::to_string(x) + ", expected: " + std::to_string(i), 0); + throw DB::Exception(0, "Failed!, read: {}, expected: {}", x, i); } stopwatch.stop(); std::cout << "Reading done. Elapsed: " << stopwatch.elapsedSeconds() << " s." diff --git a/src/IO/examples/zlib_buffers.cpp b/src/IO/examples/zlib_buffers.cpp index 2068a3e6668..a36b7a7a41d 100644 --- a/src/IO/examples/zlib_buffers.cpp +++ b/src/IO/examples/zlib_buffers.cpp @@ -50,7 +50,7 @@ try inflating_buf.ignore(); if (x != i) - throw DB::Exception("Failed!, read: " + std::to_string(x) + ", expected: " + std::to_string(i), 0); + throw DB::Exception(0, "Failed!, read: {}, expected: {}", x, i); } stopwatch.stop(); std::cout << "Reading done. Elapsed: " << stopwatch.elapsedSeconds() << " s." diff --git a/src/IO/examples/zstd_buffers.cpp b/src/IO/examples/zstd_buffers.cpp index 533229f4878..26c8899605a 100644 --- a/src/IO/examples/zstd_buffers.cpp +++ b/src/IO/examples/zstd_buffers.cpp @@ -50,7 +50,7 @@ try zstd_buf.ignore(); if (x != i) - throw DB::Exception("Failed!, read: " + std::to_string(x) + ", expected: " + std::to_string(i), 0); + throw DB::Exception(0, "Failed!, read: {}, expected: {}", x, i); } stopwatch.stop(); std::cout << "Reading done. Elapsed: " << stopwatch.elapsedSeconds() << " s." diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp index e0cba169e81..e32f50f7450 100644 --- a/src/IO/parseDateTimeBestEffort.cpp +++ b/src/IO/parseDateTimeBestEffort.cpp @@ -90,10 +90,12 @@ ReturnType parseDateTimeBestEffortImpl( const DateLUTImpl & utc_time_zone, DateTimeSubsecondPart * fractional) { - auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]]) + auto on_error = [&](int error_code [[maybe_unused]], + FormatStringHelper fmt_string [[maybe_unused]], + FmtArgs && ...fmt_args [[maybe_unused]]) { if constexpr (std::is_same_v) - throw ParsingException(message, code); + throw ParsingException(error_code, std::move(fmt_string), std::forward(fmt_args)...); else return false; }; @@ -213,7 +215,7 @@ ReturnType parseDateTimeBestEffortImpl( has_time = true; } else - return on_error("Cannot read DateTime: ambiguous 6 digits, it can be YYYYMM or hhmmss", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: ambiguous 6 digits, it can be YYYYMM or hhmmss"); } else if (num_digits == 4 && !year) { @@ -251,7 +253,7 @@ ReturnType parseDateTimeBestEffortImpl( else if (delimiter_after_year == ' ') continue; else - return on_error("Cannot read DateTime: unexpected number of decimal digits after year: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after year: {}", num_digits); /// Only the same delimiter. if (!day_of_month && checkChar(delimiter_after_year, in)) @@ -265,7 +267,7 @@ ReturnType parseDateTimeBestEffortImpl( else if (delimiter_after_year == ' ') continue; else - return on_error("Cannot read DateTime: unexpected number of decimal digits after year and month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after year and month: {}", num_digits); } } } @@ -288,12 +290,12 @@ ReturnType parseDateTimeBestEffortImpl( else if (num_digits == 1) //-V547 readDecimalNumber<1>(hour_or_day_of_month_or_month, digits); else - return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR); + return on_error(ErrorCodes::LOGICAL_ERROR, "Cannot read DateTime: logical error, unexpected branch in code"); if (checkChar(':', in)) { if (has_time) - return on_error("Cannot read DateTime: time component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: time component is duplicated"); hour = hour_or_day_of_month_or_month; has_time = true; @@ -305,7 +307,7 @@ ReturnType parseDateTimeBestEffortImpl( else if (num_digits == 1) readDecimalNumber<1>(minute, digits); else - return on_error("Cannot read DateTime: unexpected number of decimal digits after hour: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after hour: {}", num_digits); if (checkChar(':', in)) { @@ -316,16 +318,16 @@ ReturnType parseDateTimeBestEffortImpl( else if (num_digits == 1) readDecimalNumber<1>(second, digits); else - return on_error("Cannot read DateTime: unexpected number of decimal digits after hour and minute: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after hour and minute: {}", num_digits); } } else if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { if (day_of_month) - return on_error("Cannot read DateTime: day of month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: day of month is duplicated"); if (month) - return on_error("Cannot read DateTime: month is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: month is duplicated"); if constexpr (is_us_style) { @@ -336,7 +338,7 @@ ReturnType parseDateTimeBestEffortImpl( else if (num_digits == 1) readDecimalNumber<1>(day_of_month, digits); else - return on_error("Cannot read DateTime: unexpected number of decimal digits after month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after month: {}", num_digits); } else { @@ -356,13 +358,13 @@ ReturnType parseDateTimeBestEffortImpl( size_t num_alpha = readAlpha(alpha, sizeof(alpha), in); if (num_alpha < 3) - return on_error("Cannot read DateTime: unexpected number of alphabetical characters after day of month: " + toString(num_alpha), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of alphabetical characters after day of month: {}", num_alpha); if (!read_alpha_month(alpha)) - return on_error("Cannot read DateTime: alphabetical characters after day of month don't look like month: " + std::string(alpha, 3), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: alphabetical characters after day of month don't look like month: {}", std::string(alpha, 3)); } else - return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after day of month: {}", num_digits); } if (month > 12) @@ -371,7 +373,7 @@ ReturnType parseDateTimeBestEffortImpl( if (checkChar('/', in) || checkChar('.', in) || checkChar('-', in)) { if (year) - return on_error("Cannot read DateTime: year component is duplicated", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: year component is duplicated"); num_digits = readDigits(digits, sizeof(digits), in); @@ -387,7 +389,7 @@ ReturnType parseDateTimeBestEffortImpl( year += 2000; } else - return on_error("Cannot read DateTime: unexpected number of decimal digits after day of month and month: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits after day of month and month: {}", num_digits); } } else @@ -399,7 +401,7 @@ ReturnType parseDateTimeBestEffortImpl( } } else if (num_digits != 0) - return on_error("Cannot read DateTime: unexpected number of decimal digits: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits: {}", num_digits); } if (num_digits == 0) @@ -421,7 +423,7 @@ ReturnType parseDateTimeBestEffortImpl( else if (c == '.') /// We don't support comma (ISO 8601:2004) for fractional part of second to not mess up with CSV separator. { if (!has_time) - return on_error("Cannot read DateTime: unexpected point symbol", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected point symbol"); ++in.position(); num_digits = readDigits(digits, sizeof(digits), in); @@ -476,7 +478,7 @@ ReturnType parseDateTimeBestEffortImpl( readDecimalNumber<1>(time_zone_offset_hour, digits); } else - return on_error("Cannot read DateTime: unexpected number of decimal digits for time zone offset: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits for time zone offset: {}", num_digits); if (num_digits < 3 && checkChar(':', in)) { @@ -491,7 +493,7 @@ ReturnType parseDateTimeBestEffortImpl( readDecimalNumber<1>(time_zone_offset_minute, digits); } else - return on_error("Cannot read DateTime: unexpected number of decimal digits for time zone offset in minutes: " + toString(num_digits), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected number of decimal digits for time zone offset in minutes: {}", num_digits); } } } @@ -507,7 +509,7 @@ ReturnType parseDateTimeBestEffortImpl( } else if (num_alpha == 1) { - return on_error("Cannot read DateTime: unexpected alphabetical character", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected alphabetical character"); } else if (num_alpha == 2) { @@ -522,10 +524,10 @@ ReturnType parseDateTimeBestEffortImpl( is_pm = true; } else - return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected word"); } else - return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected word"); } else if (num_alpha == 3) { @@ -548,7 +550,7 @@ ReturnType parseDateTimeBestEffortImpl( else if (0 == strncasecmp(alpha, "Sun", 3)) has_day_of_week = true; else - return on_error("Cannot read DateTime: unexpected word", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected word"); while (!in.eof() && isAlphaASCII(*in.position())) ++in.position(); @@ -558,18 +560,18 @@ ReturnType parseDateTimeBestEffortImpl( checkChar(',', in); } else - return on_error("Cannot read DateTime: logical error, unexpected branch in code", ErrorCodes::LOGICAL_ERROR); + return on_error(ErrorCodes::LOGICAL_ERROR, "Cannot read DateTime: logical error, unexpected branch in code"); } } } //// Date like '2022/03/04, ' should parse fail? if (has_comma_between_date_and_time && (!has_time || !year || !month || !day_of_month)) - return on_error("Cannot read DateTime: unexpected word after Date", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected word after Date"); /// If neither Date nor Time is parsed successfully, it should fail if (!year && !month && !day_of_month && !has_time) - return on_error("Cannot read DateTime: neither Date nor Time was parsed successfully", ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: neither Date nor Time was parsed successfully"); if (!year) year = 2000; @@ -592,7 +594,8 @@ ReturnType parseDateTimeBestEffortImpl( }; if (!check_date(is_leap_year, month, day_of_month)) - return on_error("Cannot read DateTime: unexpected date: " + std::to_string(year) + "-" + std::to_string(month) + "-" + std::to_string(day_of_month), ErrorCodes::CANNOT_PARSE_DATETIME); + return on_error(ErrorCodes::CANNOT_PARSE_DATETIME, "Cannot read DateTime: unexpected date: {}-{}-{}", + year, static_cast(month), static_cast(day_of_month)); if (is_am && hour == 12) hour = 0; diff --git a/src/IO/readDecimalText.h b/src/IO/readDecimalText.h index 9d7f8137136..9fd9c439b87 100644 --- a/src/IO/readDecimalText.h +++ b/src/IO/readDecimalText.h @@ -93,8 +93,8 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp else { if constexpr (_throw_on_error) - throw Exception("Too many digits (" + std::to_string(digits + places) + " > " + std::to_string(max_digits) - + ") in decimal value", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Too many digits ({} > {}) in decimal value", + std::to_string(digits + places), std::to_string(max_digits)); return false; } @@ -121,7 +121,7 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp if (!tryReadIntText(addition_exp, buf)) { if constexpr (_throw_on_error) - throw ParsingException("Cannot parse exponent while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot parse exponent while reading decimal"); else return false; } @@ -134,7 +134,7 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp if (digits_only) { if constexpr (_throw_on_error) - throw ParsingException("Unexpected symbol while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Unexpected symbol while reading decimal"); return false; } stop = true; @@ -163,13 +163,13 @@ inline ReturnType readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, u { if constexpr (throw_exception) { - static constexpr const char * pattern = "Decimal value is too big: {} digits were read: {}e{}." + static constexpr auto pattern = "Decimal value is too big: {} digits were read: {}e{}." " Expected to read decimal with scale {} and precision {}"; if constexpr (is_big_int_v) - throw Exception(fmt::format(pattern, digits, x.value, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, pattern, digits, x.value, exponent, scale, precision); else - throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, pattern, digits, x, exponent, scale, precision); } else return ReturnType(false); diff --git a/src/IO/readFloatText.cpp b/src/IO/readFloatText.cpp index 12ac0e80c88..d1143f7c62c 100644 --- a/src/IO/readFloatText.cpp +++ b/src/IO/readFloatText.cpp @@ -37,13 +37,13 @@ bool parseNaN(ReadBuffer & buf) void assertInfinity(ReadBuffer & buf) { if (!parseInfinity(buf)) - throw Exception("Cannot parse infinity.", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse infinity."); } void assertNaN(ReadBuffer & buf) { if (!parseNaN(buf)) - throw Exception("Cannot parse NaN.", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Cannot parse NaN."); } diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h index 369ea4ab87c..c4cd46463a3 100644 --- a/src/IO/readFloatText.h +++ b/src/IO/readFloatText.h @@ -160,7 +160,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf) if (unlikely(res.ec != std::errc())) { if constexpr (throw_exception) - throw ParsingException("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value"); else return ReturnType(false); } @@ -243,7 +243,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf) if (unlikely(res.ec != std::errc())) { if constexpr (throw_exception) - throw ParsingException("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value"); else return ReturnType(false); } @@ -331,7 +331,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) if (in.eof()) { if constexpr (throw_exception) - throw ParsingException("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value"); else return false; } @@ -389,7 +389,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) if (in.eof()) { if constexpr (throw_exception) - throw ParsingException("Cannot read floating point value: nothing after exponent", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after exponent"); else return false; } @@ -427,7 +427,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) if (in.eof()) { if constexpr (throw_exception) - throw ParsingException("Cannot read floating point value: no digits read", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: no digits read"); else return false; } @@ -438,14 +438,14 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in) if (in.eof()) { if constexpr (throw_exception) - throw ParsingException("Cannot read floating point value: nothing after plus sign", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: nothing after plus sign"); else return false; } else if (negative) { if constexpr (throw_exception) - throw ParsingException("Cannot read floating point value: plus after minus sign", ErrorCodes::CANNOT_PARSE_NUMBER); + throw ParsingException(ErrorCodes::CANNOT_PARSE_NUMBER, "Cannot read floating point value: plus after minus sign"); else return false; } diff --git a/src/IO/tests/gtest_peekable_read_buffer.cpp b/src/IO/tests/gtest_peekable_read_buffer.cpp index 37c8a66668f..fc2b78c0eaf 100644 --- a/src/IO/tests/gtest_peekable_read_buffer.cpp +++ b/src/IO/tests/gtest_peekable_read_buffer.cpp @@ -89,3 +89,63 @@ catch (const DB::Exception & e) throw; } +TEST(PeekableReadBuffer, RecursiveCheckpointsWorkCorrectly) +try +{ + + std::string s1 = "0123456789"; + std::string s2 = "qwertyuiop"; + + DB::ConcatReadBuffer concat; + concat.appendBuffer(std::make_unique(s1)); + concat.appendBuffer(std::make_unique(s2)); + DB::PeekableReadBuffer peekable(concat, 0); + + ASSERT_TRUE(!peekable.eof()); + assertAvailable(peekable, "0123456789"); + readAndAssert(peekable, "01234"); + peekable.setCheckpoint(); + + readAndAssert(peekable, "56"); + + peekable.setCheckpoint(); + readAndAssert(peekable, "78"); + assertAvailable(peekable, "9"); + peekable.rollbackToCheckpoint(); + assertAvailable(peekable, "789"); + + readAndAssert(peekable, "789"); + peekable.setCheckpoint(); + readAndAssert(peekable, "qwert"); + peekable.rollbackToCheckpoint(); + assertAvailable(peekable, "qwertyuiop"); + peekable.dropCheckpoint(); + + readAndAssert(peekable, "qwerty"); + peekable.setCheckpoint(); + readAndAssert(peekable, "ui"); + peekable.rollbackToCheckpoint(); + assertAvailable(peekable, "uiop"); + peekable.dropCheckpoint(); + + peekable.rollbackToCheckpoint(); + assertAvailable(peekable, "789"); + peekable.dropCheckpoint(); + + readAndAssert(peekable, "789"); + readAndAssert(peekable, "qwerty"); + peekable.rollbackToCheckpoint(); + assertAvailable(peekable, "56789"); + peekable.dropCheckpoint(); + + readAndAssert(peekable, "56789q"); + assertAvailable(peekable, "wertyuiop"); + ASSERT_TRUE(!peekable.hasUnreadData()); + readAndAssert(peekable, "wertyuiop"); + ASSERT_TRUE(peekable.eof()); +} +catch (const DB::Exception & e) +{ + std::cerr << e.what() << ", " << e.displayText() << std::endl; + throw; +} diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index 1aa5cd03530..0bde147fbb6 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -106,7 +106,9 @@ BlockIO InterpreterCreateUserQuery::execute() if (!query.attach && !query.alter && !query.auth_data && !implicit_no_password_allowed) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Authentication type NO_PASSWORD must be explicitly specified, check the setting allow_implicit_no_password in the server configuration"); + "Authentication type NO_PASSWORD must " + "be explicitly specified, check the setting allow_implicit_no_password " + "in the server configuration"); if (!query.attach && query.temporary_password_for_checks) { diff --git a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp index 3437e7fe0f4..d4c37064065 100644 --- a/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterDropAccessEntityQuery.cpp @@ -81,8 +81,7 @@ AccessRightsElements InterpreterDropAccessEntityQuery::getRequiredAccess() const case AccessEntityType::MAX: break; } - throw Exception( - toString(query.type) + ": type is not supported by DROP query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by DROP query", toString(query.type)); } } diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index ae59e0462fa..7f2093a22a7 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -353,9 +353,9 @@ BlockIO InterpreterGrantQuery::execute() query.access_rights_elements.eraseNonGrantable(); if (!query.access_rights_elements.sameOptions()) - throw Exception("Elements of an ASTGrantQuery are expected to have the same options", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Elements of an ASTGrantQuery are expected to have the same options"); if (!query.access_rights_elements.empty() && query.access_rights_elements[0].is_partial_revoke && !query.is_revoke) - throw Exception("A partial revoke should be revoked, not granted", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "A partial revoke should be revoked, not granted"); auto & access_control = getContext()->getAccessControl(); auto current_user_access = getContext()->getAccess(); diff --git a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp index 6acb9ab5e19..69740f90d4c 100644 --- a/src/Interpreters/Access/InterpreterSetRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterSetRoleQuery.cpp @@ -49,7 +49,7 @@ void InterpreterSetRoleQuery::setRole(const ASTSetRoleQuery & query) for (const auto & id : roles_from_query.getMatchingIDs()) { if (!user->granted_roles.isGranted(id)) - throw Exception("Role should be granted to set current", ErrorCodes::SET_NON_GRANTED_ROLE); + throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role should be granted to set current"); new_current_roles.emplace_back(id); } } @@ -84,7 +84,7 @@ void InterpreterSetRoleQuery::updateUserSetDefaultRoles(User & user, const Roles for (const auto & id : roles_from_query.getMatchingIDs()) { if (!user.granted_roles.isGranted(id)) - throw Exception("Role should be granted to set default", ErrorCodes::SET_NON_GRANTED_ROLE); + throw Exception(ErrorCodes::SET_NON_GRANTED_ROLE, "Role should be granted to set default"); } } user.default_roles = roles_from_query; diff --git a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp index acb7f521493..e7b9237b680 100644 --- a/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowAccessEntitiesQuery.cpp @@ -115,7 +115,7 @@ String InterpreterShowAccessEntitiesQuery::getRewrittenQuery() const } if (origin.empty()) - throw Exception(toString(query.type) + ": type is not supported by SHOW query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by SHOW query", toString(query.type)); if (order.empty() && expr != "*") order = expr; diff --git a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp index 880ad434bba..7b9a8f98c8f 100644 --- a/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowCreateAccessEntityQuery.cpp @@ -227,7 +227,7 @@ namespace return getCreateQueryImpl(*quota, access_control, attach_mode); if (const SettingsProfile * profile = typeid_cast(&entity)) return getCreateQueryImpl(*profile, access_control, attach_mode); - throw Exception(entity.formatTypeWithName() + ": type is not supported by SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by SHOW CREATE query", entity.formatTypeWithName()); } } @@ -409,6 +409,6 @@ AccessRightsElements InterpreterShowCreateAccessEntityQuery::getRequiredAccess() case AccessEntityType::MAX: break; } - throw Exception(toString(show_query.type) + ": type is not supported by SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "{}: type is not supported by SHOW CREATE query", toString(show_query.type)); } } diff --git a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp index 617239a61cc..2ff6d44e041 100644 --- a/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp +++ b/src/Interpreters/Access/InterpreterShowGrantsQuery.cpp @@ -94,7 +94,7 @@ namespace return getGrantQueriesImpl(*user, access_control, attach_mode); if (const Role * role = typeid_cast(&entity)) return getGrantQueriesImpl(*role, access_control, attach_mode); - throw Exception(entity.formatTypeWithName() + " is expected to be user or role", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "{} is expected to be user or role", entity.formatTypeWithName()); } } diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index 2b55fe23d4d..5f1398fed39 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -172,7 +172,7 @@ const ActionsDAG::Node & ActionsDAG::addArrayJoin(const Node & child, std::strin { const auto & array_type = getArrayJoinDataType(child.result_type); if (!array_type) - throw Exception("ARRAY JOIN requires array argument", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN requires array argument"); if (result_name.empty()) result_name = "arrayJoin(" + child.result_name + ")"; @@ -1086,7 +1086,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions( throw Exception(ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH, "Number of columns doesn't match"); if (add_casted_columns && mode != MatchColumnsMode::Name) - throw Exception("Converting with add_casted_columns supported only for MatchColumnsMode::Name", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Converting with add_casted_columns supported only for MatchColumnsMode::Name"); auto actions_dag = std::make_shared(source); NodeRawConstPtrs projection(num_result_columns); diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index e1af752b100..2974b5c0ee0 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -155,15 +155,15 @@ static Block createBlockFromCollection(const Collection & collection, const Data else { if (value.getType() != Field::Types::Tuple) - throw Exception("Invalid type in set. Expected tuple, got " - + String(value.getTypeName()), ErrorCodes::INCORRECT_ELEMENT_OF_SET); + throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Invalid type in set. Expected tuple, got {}", + String(value.getTypeName())); const auto & tuple = value.template get(); size_t tuple_size = tuple.size(); if (tuple_size != columns_num) - throw Exception("Incorrect size of tuple in set: " + toString(tuple_size) - + " instead of " + toString(columns_num), ErrorCodes::INCORRECT_ELEMENT_OF_SET); + throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", + tuple_size, columns_num); if (tuple_values.empty()) tuple_values.resize(tuple_size); @@ -204,7 +204,7 @@ static Field extractValueFromNode(const ASTPtr & node, const IDataType & type, C return convertFieldToType(value_raw.first, type, value_raw.second.get()); } else - throw Exception("Incorrect element of set. Must be literal or constant expression.", ErrorCodes::INCORRECT_ELEMENT_OF_SET); + throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect element of set. Must be literal or constant expression."); } static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, ContextPtr context) @@ -277,8 +277,8 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co size_t tuple_size = tuple ? tuple->size() : func->arguments->children.size(); //-V1004 if (tuple_size != num_columns) - throw Exception("Incorrect size of tuple in set: " + toString(tuple_size) + " instead of " + toString(num_columns), - ErrorCodes::INCORRECT_ELEMENT_OF_SET); + throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", + tuple_size, num_columns); if (tuple_values.empty()) tuple_values.resize(tuple_size); @@ -305,7 +305,7 @@ static Block createBlockFromAST(const ASTPtr & node, const DataTypes & types, Co columns[i]->insert(tuple_values[i]); } else - throw Exception("Incorrect element of set", ErrorCodes::INCORRECT_ELEMENT_OF_SET); + throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect element of set"); } return header.cloneWithColumns(std::move(columns)); @@ -335,8 +335,8 @@ Block createBlockForSet( auto throw_unsupported_type = [](const auto & type) { - throw Exception("Unsupported value type at the right-side of IN: " - + type->getName() + ".", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported value type at the right-side of IN: {}.", + type->getName()); }; Block block; @@ -406,16 +406,14 @@ Block createBlockForSet( { const auto * set_func = right_arg->as(); if (!set_func || (set_func->name != "tuple" && set_func->name != "array")) - throw Exception("Incorrect type of 2nd argument for function 'in'" - ". Must be subquery or set of elements with type " + left_arg_type->getName() + ".", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Incorrect type of 2nd argument for function 'in'. " + "Must be subquery or set of elements with type {}.", left_arg_type->getName()); elements_ast = set_func->arguments; } else - throw Exception("Invalid types for IN function: " - + left_arg_type->getName() + " and " + right_arg_type->getName() + ".", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Invalid types for IN function: {} and {}.", + left_arg_type->getName(), right_arg_type->getName()); return createBlockFromAST(elements_ast, set_element_types, context); } @@ -430,7 +428,7 @@ SetPtr makeExplicitSet( const IAST & args = *node->arguments; if (args.children.size() != 2) - throw Exception("Wrong number of arguments passed to function in", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments passed to function in"); const ASTPtr & left_arg = args.children.at(0); const ASTPtr & right_arg = args.children.at(1); @@ -491,7 +489,7 @@ public: { bool inserted = map.emplace(node->result_name, node).second; if (!inserted) - throw Exception("Column '" + node->result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column '{}' already exists", node->result_name); index.push_back(node); } @@ -509,7 +507,7 @@ public: { const auto * node = tryGetNode(name); if (!node) - throw Exception("Unknown identifier: '" + name + "'", ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: '{}'", name); return *node; } @@ -618,7 +616,7 @@ size_t ScopeStack::getColumnLevel(const std::string & name) return i; } - throw Exception("Unknown identifier: " + name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: {}", name); } void ScopeStack::addColumn(ColumnWithTypeAndName column) @@ -653,8 +651,8 @@ void ScopeStack::addArrayJoin(const std::string & source_name, std::string resul const auto * source_node = stack.front().index->tryGetNode(source_name); if (!source_node) - throw Exception("Expression with arrayJoin cannot depend on lambda argument: " + source_name, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression with arrayJoin cannot depend on lambda argument: {}", + source_name); const auto & node = stack.front().actions_dag->addArrayJoin(*source_node, std::move(result_name)); stack.front().index->addNode(&node); @@ -767,9 +765,8 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt return NameAndTypePair(child_column_name, node->result_type); if (!data.only_consts) - throw Exception( - "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), - ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier: {}; there are columns: {}", + child_column_name, data.actions_stack.dumpNames()); return {}; } @@ -888,13 +885,13 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & return; if (node.name == "lambda") - throw Exception("Unexpected lambda expression", ErrorCodes::UNEXPECTED_EXPRESSION); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Unexpected lambda expression"); /// Function arrayJoin. if (node.name == "arrayJoin") { if (node.arguments->children.size() != 1) - throw Exception("arrayJoin requires exactly 1 argument", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "arrayJoin requires exactly 1 argument"); ASTPtr arg = node.arguments->children.at(0); visit(arg, data); @@ -913,7 +910,8 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & if (arguments_size == 0) throw Exception(ErrorCodes::TOO_FEW_ARGUMENTS_FOR_FUNCTION, "Function GROUPING expects at least one argument"); if (arguments_size > 64) - throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size); + throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION, + "Function GROUPING can have up to 64 arguments, but {} provided", arguments_size); auto keys_info = data.aggregation_keys_info; auto aggregation_keys_number = keys_info.aggregation_keys.size(); @@ -1131,12 +1129,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { /// If the argument is a lambda expression, just remember its approximate type. if (function->arguments->children.size() != 2) - throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); const auto * lambda_args_tuple = function->arguments->children.at(0)->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") - throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); has_lambda_arguments = true; argument_types.emplace_back(std::make_shared(DataTypes(lambda_args_tuple->arguments->children.size()))); @@ -1255,7 +1253,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & { auto opt_arg_name = tryGetIdentifierName(lambda_arg_asts[j]); if (!opt_arg_name) - throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "lambda argument declarations must be identifiers"); lambda_arguments.emplace_back(*opt_arg_name, lambda_type->getArgumentTypes()[j]); } diff --git a/src/Interpreters/AggregationCommon.h b/src/Interpreters/AggregationCommon.h index 5904cc48084..32b01ee0416 100644 --- a/src/Interpreters/AggregationCommon.h +++ b/src/Interpreters/AggregationCommon.h @@ -139,7 +139,7 @@ static inline T ALWAYS_INLINE packFixed( case sizeof(UInt16): index = assert_cast(positions)->getElement(i); break; case sizeof(UInt32): index = assert_cast(positions)->getElement(i); break; case sizeof(UInt64): index = assert_cast(positions)->getElement(i); break; - default: throw Exception("Unexpected size of index type for low cardinality column.", ErrorCodes::LOGICAL_ERROR); + default: throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of index type for low cardinality column."); } } } diff --git a/src/Interpreters/AggregationUtils.cpp b/src/Interpreters/AggregationUtils.cpp index 157590e6f44..125a9e4f6b8 100644 --- a/src/Interpreters/AggregationUtils.cpp +++ b/src/Interpreters/AggregationUtils.cpp @@ -65,7 +65,7 @@ OutputBlockColumns prepareOutputBlockColumns( } if (key_columns.size() != params.keys_size) - throw Exception{"Aggregate. Unexpected key columns size.", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Aggregate. Unexpected key columns size."); std::vector raw_key_columns; raw_key_columns.reserve(key_columns.size()); diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 57848d692f1..da4442f99d3 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -382,7 +382,7 @@ void AggregatedDataVariants::convertToTwoLevel() #undef M default: - throw Exception("Wrong data variant passed.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong data variant passed."); } } @@ -622,7 +622,7 @@ Aggregator::Aggregator(const Block & header_, const Params & params_) { size_t alignment_of_next_state = params.aggregates[i + 1].function->alignOfData(); if ((alignment_of_next_state & (alignment_of_next_state - 1)) != 0) - throw Exception("Logical error: alignOfData is not 2^N", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: alignOfData is not 2^N"); /// Extend total_size to next alignment requirement /// Add padding by rounding up 'total_size_of_aggregate_states' to be a multiplier of alignment_of_next_state. @@ -831,7 +831,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::low_cardinality_keys128; if (size_of_field == 32) return AggregatedDataVariants::Type::low_cardinality_keys256; - throw Exception("Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: low cardinality numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } if (size_of_field == 1) @@ -846,7 +846,7 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod() return AggregatedDataVariants::Type::keys128; if (size_of_field == 32) return AggregatedDataVariants::Type::keys256; - throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } if (params.keys_size == 1 && isFixedString(types_removed_nullable[0])) @@ -986,7 +986,7 @@ void Aggregator::mergeOnBlockSmall( APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } void Aggregator::executeImpl( @@ -1592,7 +1592,7 @@ bool Aggregator::executeOnBlock(Columns columns, void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, size_t max_temp_file_size) const { if (!tmp_data) - throw Exception("Cannot write to temporary file because temporary file is not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write to temporary file because temporary file is not initialized"); Stopwatch watch; size_t rows = data_variants.size(); @@ -1612,7 +1612,7 @@ void Aggregator::writeToTemporaryFile(AggregatedDataVariants & data_variants, si APPLY_FOR_VARIANTS_TWO_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant"); /// NOTE Instead of freeing up memory and creating new hash tables and arenas, you can re-use the old ones. data_variants.init(data_variants.type); @@ -1746,9 +1746,8 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const { case OverflowMode::THROW: ProfileEvents::increment(ProfileEvents::OverflowThrow); - throw Exception("Limit for rows to GROUP BY exceeded: has " + toString(result_size) - + " rows, maximum: " + toString(params.max_rows_to_group_by), - ErrorCodes::TOO_MANY_ROWS); + throw Exception(ErrorCodes::TOO_MANY_ROWS, "Limit for rows to GROUP BY exceeded: has {} rows, maximum: {}", + result_size, params.max_rows_to_group_by); case OverflowMode::BREAK: ProfileEvents::increment(ProfileEvents::OverflowBreak); @@ -2174,7 +2173,7 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va AggregatedDataWithoutKey & data = data_variants.without_key; if (!data) - throw Exception("Wrong data variant passed.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong data variant passed."); if (!final) { @@ -2219,7 +2218,7 @@ Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_variant if (false) {} // NOLINT APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) #undef M - else throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + else throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } @@ -2233,7 +2232,7 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevel(AggregatedDataVariants & dat APPLY_FOR_VARIANTS_TWO_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } @@ -2670,7 +2669,7 @@ void NO_INLINE Aggregator::mergeBucketImpl( ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedDataVariants & data_variants) const { if (data_variants.empty()) - throw Exception("Empty data passed to Aggregator::prepareVariantsToMerge.", ErrorCodes::EMPTY_DATA_PASSED); + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Empty data passed to Aggregator::prepareVariantsToMerge."); LOG_TRACE(log, "Merging aggregated data"); @@ -2719,7 +2718,7 @@ ManyAggregatedDataVariants Aggregator::prepareVariantsToMerge(ManyAggregatedData for (size_t i = 1, size = non_empty_data.size(); i < size; ++i) { if (first->type != non_empty_data[i]->type) - throw Exception("Cannot merge different aggregated data variants.", ErrorCodes::CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS); + throw Exception(ErrorCodes::CANNOT_MERGE_DIFFERENT_AGGREGATED_DATA_VARIANTS, "Cannot merge different aggregated data variants."); /** Elements from the remaining sets can be moved to the first data set. * Therefore, it must own all the arenas of all other sets. @@ -2885,7 +2884,7 @@ bool Aggregator::mergeOnBlock(Block block, AggregatedDataVariants & result, bool APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M else if (result.type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); size_t result_size = result.sizeWithoutOverflowRow(); Int64 current_memory_usage = 0; @@ -2991,7 +2990,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari APPLY_FOR_VARIANTS_TWO_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } }; @@ -3045,7 +3044,7 @@ void Aggregator::mergeBlocks(BucketToBlocks bucket_to_blocks, AggregatedDataVari APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M else if (result.type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } LOG_TRACE(log, "Merged partially aggregated single-level data."); @@ -3121,7 +3120,7 @@ Block Aggregator::mergeBlocks(BlocksList & blocks, bool final) APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M else if (result.type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } Block block; @@ -3245,7 +3244,7 @@ std::vector Aggregator::convertBlockToTwoLevel(const Block & block) const APPLY_FOR_VARIANTS_CONVERTIBLE_TO_TWO_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); data.init(type); @@ -3259,7 +3258,7 @@ std::vector Aggregator::convertBlockToTwoLevel(const Block & block) const APPLY_FOR_VARIANTS_TWO_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); std::vector splitted_blocks(num_buckets); @@ -3272,7 +3271,7 @@ std::vector Aggregator::convertBlockToTwoLevel(const Block & block) const APPLY_FOR_VARIANTS_TWO_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); return splitted_blocks; } @@ -3329,7 +3328,7 @@ void Aggregator::destroyAllAggregateStates(AggregatedDataVariants & result) cons APPLY_FOR_AGGREGATED_VARIANTS(M) #undef M else if (result.type != AggregatedDataVariants::Type::without_key) - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index c81cfa2c0a2..7fe4dd89d8e 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -863,7 +863,7 @@ struct AggregatedDataVariants : private boost::noncopyable #undef M default: - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); } } }; diff --git a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp index 3ff7a39eb4f..66e0813b977 100644 --- a/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp +++ b/src/Interpreters/ArithmeticOperationsInAgrFuncOptimize.cpp @@ -70,7 +70,7 @@ Field zeroField(const Field & value) break; } - throw Exception("Unexpected literal type in function", ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Unexpected literal type in function"); } const String & changeNameIfNeeded(const String & func_name, const String & child_name, const ASTLiteral & literal) diff --git a/src/Interpreters/ArrayJoinAction.cpp b/src/Interpreters/ArrayJoinAction.cpp index ba54f1a324e..3650b888f9e 100644 --- a/src/Interpreters/ArrayJoinAction.cpp +++ b/src/Interpreters/ArrayJoinAction.cpp @@ -64,7 +64,7 @@ ArrayJoinAction::ArrayJoinAction(const NameSet & array_joined_columns_, bool arr , is_unaligned(context->getSettingsRef().enable_unaligned_array_join) { if (columns.empty()) - throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No arrays to join"); if (is_unaligned) { @@ -89,19 +89,19 @@ void ArrayJoinAction::prepare(ColumnsWithTypeAndName & sample) const current.type = type->getNestedType(); } else - throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN requires array or map argument"); } } void ArrayJoinAction::execute(Block & block) { if (columns.empty()) - throw Exception("No arrays to join", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No arrays to join"); ColumnPtr any_array_map_ptr = block.getByName(*columns.begin()).column->convertToFullColumnIfConst(); const auto * any_array = getArrayJoinColumnRawPtr(any_array_map_ptr); if (!any_array) - throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN requires array or map argument"); /// If LEFT ARRAY JOIN, then we create columns in which empty arrays are replaced by arrays with one element - the default value. std::map non_empty_array_columns; @@ -143,7 +143,7 @@ void ArrayJoinAction::execute(Block & block) any_array = getArrayJoinColumnRawPtr(any_array_map_ptr); if (!any_array) - throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN requires array or map argument"); } else if (is_left) { @@ -158,7 +158,7 @@ void ArrayJoinAction::execute(Block & block) any_array_map_ptr = non_empty_array_columns.begin()->second->convertToFullColumnIfConst(); any_array = getArrayJoinColumnRawPtr(any_array_map_ptr); if (!any_array) - throw Exception("ARRAY JOIN requires array or map argument", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN requires array or map argument"); } @@ -186,13 +186,13 @@ void ArrayJoinAction::execute(Block & block) const ColumnArray & array = typeid_cast(*array_ptr); if (!is_unaligned && !array.hasEqualOffsets(*any_array)) - throw Exception("Sizes of ARRAY-JOIN-ed arrays do not match", ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DOESNT_MATCH, "Sizes of ARRAY-JOIN-ed arrays do not match"); current.column = typeid_cast(*array_ptr).getDataPtr(); current.type = type->getNestedType(); } else - throw Exception("ARRAY JOIN of not array nor map: " + current.name, ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN of not array nor map: {}", current.name); } else { diff --git a/src/Interpreters/ArrayJoinedColumnsVisitor.h b/src/Interpreters/ArrayJoinedColumnsVisitor.h index b64a170cb00..3bbd6982213 100644 --- a/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -62,7 +62,7 @@ private: { auto [array_join_expression_list, _] = node.arrayJoinExpressionList(); if (!array_join_expression_list) - throw Exception("Logical error: no ARRAY JOIN", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no ARRAY JOIN"); std::vector out; out.reserve(array_join_expression_list->children.size()); @@ -73,10 +73,10 @@ private: const String nested_table_alias = ast->getAliasOrColumnName(); if (nested_table_alias == nested_table_name && !ast->as()) - throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED); + throw Exception(ErrorCodes::ALIAS_REQUIRED, "No alias for non-trivial value in ARRAY JOIN: {}", nested_table_name); if (data.array_join_alias_to_name.count(nested_table_alias) || data.aliases.count(nested_table_alias)) - throw Exception("Duplicate alias in ARRAY JOIN: " + nested_table_alias, ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); + throw Exception(ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS, "Duplicate alias in ARRAY JOIN: {}", nested_table_alias); data.array_join_alias_to_name[nested_table_alias] = nested_table_name; data.array_join_name_to_alias[nested_table_name] = nested_table_alias; diff --git a/src/Interpreters/BloomFilter.cpp b/src/Interpreters/BloomFilter.cpp index ff60db0dbb3..99d7e256cf1 100644 --- a/src/Interpreters/BloomFilter.cpp +++ b/src/Interpreters/BloomFilter.cpp @@ -25,9 +25,9 @@ BloomFilterParameters::BloomFilterParameters(size_t filter_size_, size_t filter_ : filter_size(filter_size_), filter_hashes(filter_hashes_), seed(seed_) { if (filter_size == 0) - throw Exception("The size of bloom filter cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The size of bloom filter cannot be zero"); if (filter_hashes == 0) - throw Exception("The number of hash functions for bloom filter cannot be zero", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The number of hash functions for bloom filter cannot be zero"); if (filter_size > MAX_BLOOM_FILTER_SIZE) throw Exception(ErrorCodes::BAD_ARGUMENTS, "The size of bloom filter cannot be more than {}", MAX_BLOOM_FILTER_SIZE); } @@ -121,7 +121,7 @@ DataTypePtr BloomFilter::getPrimitiveType(const DataTypePtr & data_type) if (!typeid_cast(array_type->getNestedType().get())) return getPrimitiveType(array_type->getNestedType()); else - throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName()); } if (const auto * nullable_type = typeid_cast(data_type.get())) diff --git a/src/Interpreters/BloomFilterHash.h b/src/Interpreters/BloomFilterHash.h index b95abbfd770..88feeb5b3df 100644 --- a/src/Interpreters/BloomFilterHash.h +++ b/src/Interpreters/BloomFilterHash.h @@ -96,7 +96,7 @@ struct BloomFilterHash else if (which.isUUID()) return build_hash_column(getNumberTypeHash(field)); else if (which.isString()) return build_hash_column(getStringTypeHash(field)); else if (which.isFixedString()) return build_hash_column(getFixedStringTypeHash(field, data_type)); - else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS); + else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName()); } static ColumnPtr hashWithColumn(const DataTypePtr & data_type, const ColumnPtr & column, size_t pos, size_t limit) @@ -107,7 +107,7 @@ struct BloomFilterHash const auto * array_col = typeid_cast(column.get()); if (checkAndGetColumn(array_col->getData())) - throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName()); const auto & offsets = array_col->getOffsets(); limit = offsets[pos + limit - 1] - offsets[pos - 1]; /// PaddedPODArray allows access on index -1. @@ -158,7 +158,7 @@ struct BloomFilterHash else if (which.isUUID()) getNumberTypeHash(column, vec, pos); else if (which.isString()) getStringTypeHash(column, vec, pos); else if (which.isFixedString()) getStringTypeHash(column, vec, pos); - else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS); + else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unexpected type {} of bloom filter index.", data_type->getName()); } template @@ -167,7 +167,7 @@ struct BloomFilterHash const auto * index_column = typeid_cast *>(column); if (unlikely(!index_column)) - throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column type was passed to the bloom filter index."); const typename ColumnVector::Container & vec_from = index_column->getData(); @@ -236,7 +236,7 @@ struct BloomFilterHash } } else - throw Exception("Illegal column type was passed to the bloom filter index.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column type was passed to the bloom filter index."); } static std::pair calculationBestPractices(double max_conflict_probability) diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 687b38c3020..825dc70f9c8 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1399,7 +1399,7 @@ FileCache::QueryContextPtr FileCache::getOrSetQueryContext( if (context) return context; - auto query_context = std::make_shared(settings.max_query_cache_size, settings.skip_download_if_exceeds_query_cache); + auto query_context = std::make_shared(settings.filesystem_cache_max_download_size, settings.skip_download_if_exceeds_query_cache); auto query_iter = query_map.emplace(query_id, query_context).first; return query_iter->second; } @@ -1408,10 +1408,10 @@ FileCache::QueryContextHolder FileCache::getQueryContextHolder(const String & qu { std::lock_guard cache_lock(mutex); - if (!enable_filesystem_query_cache_limit || settings.max_query_cache_size == 0) + if (!enable_filesystem_query_cache_limit || settings.filesystem_cache_max_download_size == 0) return {}; - /// if enable_filesystem_query_cache_limit is true, and max_query_cache_size large than zero, + /// if enable_filesystem_query_cache_limit is true, and filesystem_cache_max_download_size large than zero, /// we create context query for current query. auto context = getOrSetQueryContext(query_id, settings, cache_lock); return QueryContextHolder(query_id, this, context); diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp new file mode 100644 index 00000000000..06fd0689993 --- /dev/null +++ b/src/Interpreters/Cache/QueryCache.cpp @@ -0,0 +1,343 @@ +#include "Interpreters/Cache/QueryCache.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /// chassert + + +namespace ProfileEvents +{ + extern const Event QueryCacheHits; + extern const Event QueryCacheMisses; +}; + +namespace DB +{ + +namespace +{ + +struct HasNonDeterministicFunctionsMatcher +{ + struct Data + { + const ContextPtr context; + bool has_non_deterministic_functions = false; + }; + + static bool needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } + + static void visit(const ASTPtr & node, Data & data) + { + if (data.has_non_deterministic_functions) + return; + + if (const auto * function = node->as()) + { + const auto func = FunctionFactory::instance().tryGet(function->name, data.context); + if (func && !func->isDeterministic()) + data.has_non_deterministic_functions = true; + } + } +}; + +using HasNonDeterministicFunctionsVisitor = InDepthNodeVisitor; + +} + +bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context) +{ + HasNonDeterministicFunctionsMatcher::Data finder_data{context}; + HasNonDeterministicFunctionsVisitor(finder_data).visit(ast); + return finder_data.has_non_deterministic_functions; +} + +namespace +{ + +class RemoveQueryCacheSettingsMatcher +{ +public: + struct Data {}; + + static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; } + + static void visit(ASTPtr & ast, Data &) + { + if (auto * set_clause = ast->as()) + { + chassert(!set_clause->is_standalone); + + auto is_query_cache_related_setting = [](const auto & change) + { + return change.name == "allow_experimental_query_cache" + || change.name.starts_with("query_cache") + || change.name.ends_with("query_cache"); + }; + + std::erase_if(set_clause->changes, is_query_cache_related_setting); + } + } + + /// TODO further improve AST cleanup, e.g. remove SETTINGS clause completely if it is empty + /// E.g. SELECT 1 SETTINGS use_query_cache = true + /// and SELECT 1; + /// currently don't match. +}; + +using RemoveQueryCacheSettingsVisitor = InDepthNodeVisitor; + +/// Consider +/// (1) SET use_query_cache = true; +/// SELECT expensiveComputation(...) SETTINGS max_threads = 64, query_cache_ttl = 300; +/// SET use_query_cache = false; +/// and +/// (2) SELECT expensiveComputation(...) SETTINGS max_threads = 64, use_query_cache = true; +/// +/// The SELECT queries in (1) and (2) are basically the same and the user expects that the second invocation is served from the query +/// cache. However, query results are indexed by their query ASTs and therefore no result will be found. Insert and retrieval behave overall +/// more natural if settings related to the query cache are erased from the AST key. Note that at this point the settings themselves +/// have been parsed already, they are not lost or discarded. +ASTPtr removeQueryCacheSettings(ASTPtr ast) +{ + ASTPtr transformed_ast = ast->clone(); + + RemoveQueryCacheSettingsMatcher::Data visitor_data; + RemoveQueryCacheSettingsVisitor(visitor_data).visit(transformed_ast); + + return transformed_ast; +} + +} + +QueryCache::Key::Key( + ASTPtr ast_, + Block header_, const std::optional & username_, + std::chrono::time_point expires_at_) + : ast(removeQueryCacheSettings(ast_)) + , header(header_) + , username(username_) + , expires_at(expires_at_) +{ +} + +bool QueryCache::Key::operator==(const Key & other) const +{ + return ast->getTreeHash() == other.ast->getTreeHash(); +} + +String QueryCache::Key::queryStringFromAst() const +{ + WriteBufferFromOwnString buf; + IAST::FormatSettings format_settings(buf, /*one_line*/ true); + format_settings.show_secrets = false; + ast->format(format_settings); + return buf.str(); +} + +size_t QueryCache::KeyHasher::operator()(const Key & key) const +{ + SipHash hash; + hash.update(key.ast->getTreeHash()); + auto res = hash.get64(); + return res; +} + +size_t QueryCache::QueryResult::sizeInBytes() const +{ + size_t res = 0; + for (const auto & chunk : *chunks) + res += chunk.allocatedBytes(); + return res; +}; + +namespace +{ + +auto is_stale = [](const QueryCache::Key & key) +{ + return (key.expires_at < std::chrono::system_clock::now()); +}; + +} + +QueryCache::Writer::Writer(std::mutex & mutex_, Cache & cache_, const Key & key_, + size_t & cache_size_in_bytes_, size_t max_cache_size_in_bytes_, + size_t max_cache_entries_, + size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_, + std::chrono::milliseconds min_query_runtime_) + : mutex(mutex_) + , cache(cache_) + , key(key_) + , cache_size_in_bytes(cache_size_in_bytes_) + , max_cache_size_in_bytes(max_cache_size_in_bytes_) + , max_cache_entries(max_cache_entries_) + , max_entry_size_in_bytes(max_entry_size_in_bytes_) + , max_entry_size_in_rows(max_entry_size_in_rows_) + , min_query_runtime(min_query_runtime_) +{ + if (auto it = cache.find(key); it != cache.end() && !is_stale(it->first)) + skip_insert = true; /// Key already contained in cache and did not expire yet --> don't replace it +} + +void QueryCache::Writer::buffer(Chunk && partial_query_result) +{ + if (skip_insert) + return; + + auto & chunks = query_result.chunks; + + chunks->emplace_back(std::move(partial_query_result)); + + new_entry_size_in_bytes += chunks->back().allocatedBytes(); + new_entry_size_in_rows += chunks->back().getNumRows(); + + if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows)) + { + chunks->clear(); /// eagerly free some space + skip_insert = true; + } +} + +void QueryCache::Writer::finalizeWrite() +{ + if (skip_insert) + return; + + if (std::chrono::duration_cast(std::chrono::system_clock::now() - query_start_time) < min_query_runtime) + return; + + std::lock_guard lock(mutex); + + if (auto it = cache.find(key); it != cache.end() && !is_stale(it->first)) + return; /// same check as in ctor because a parallel Writer could have inserted the current key in the meantime + + auto sufficient_space_in_cache = [this]() TSA_REQUIRES(mutex) + { + return (cache_size_in_bytes + new_entry_size_in_bytes <= max_cache_size_in_bytes) && (cache.size() + 1 <= max_cache_entries); + }; + + if (!sufficient_space_in_cache()) + { + size_t removed_items = 0; + /// Remove stale entries + for (auto it = cache.begin(); it != cache.end();) + if (is_stale(it->first)) + { + cache_size_in_bytes -= it->second.sizeInBytes(); + it = cache.erase(it); + ++removed_items; + } + else + ++it; + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Removed {} stale entries", removed_items); + } + + /// Insert or replace if enough space + if (sufficient_space_in_cache()) + { + cache_size_in_bytes += query_result.sizeInBytes(); + if (auto it = cache.find(key); it != cache.end()) + cache_size_in_bytes -= it->second.sizeInBytes(); // key replacement + + cache[key] = std::move(query_result); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stored result of query {}", key.queryStringFromAst()); + } +} + +QueryCache::Reader::Reader(const Cache & cache_, const Key & key, size_t & cache_size_in_bytes_, const std::lock_guard &) +{ + auto it = cache_.find(key); + + if (it == cache_.end()) + { + LOG_TRACE(&Poco::Logger::get("QueryCache"), "No entry found for query {}", key.queryStringFromAst()); + return; + } + + if (it->first.username.has_value() && it->first.username != key.username) + { + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Inaccessible entry found for query {}", key.queryStringFromAst()); + return; + } + + if (is_stale(it->first)) + { + cache_size_in_bytes_ -= it->second.sizeInBytes(); + const_cast(cache_).erase(it); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Stale entry found and removed for query {}", key.queryStringFromAst()); + return; + } + + pipe = Pipe(std::make_shared(it->first.header, it->second.chunks)); + LOG_TRACE(&Poco::Logger::get("QueryCache"), "Entry found for query {}", key.queryStringFromAst()); +} + +bool QueryCache::Reader::hasCacheEntryForKey() const +{ + bool res = !pipe.empty(); + + if (res) + ProfileEvents::increment(ProfileEvents::QueryCacheHits); + else + ProfileEvents::increment(ProfileEvents::QueryCacheMisses); + + return res; +} + +Pipe && QueryCache::Reader::getPipe() +{ + chassert(!pipe.empty()); // cf. hasCacheEntryForKey() + return std::move(pipe); +} + +QueryCache::QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_) + : max_cache_size_in_bytes(max_cache_size_in_bytes_) + , max_cache_entries(max_cache_entries_) + , max_cache_entry_size_in_bytes(max_cache_entry_size_in_bytes_) + , max_cache_entry_size_in_rows(max_cache_entry_size_in_rows_) +{ +} + +QueryCache::Reader QueryCache::createReader(const Key & key) +{ + std::lock_guard lock(mutex); + return Reader(cache, key, cache_size_in_bytes, lock); +} + +QueryCache::Writer QueryCache::createWriter(const Key & key, std::chrono::milliseconds min_query_runtime) +{ + std::lock_guard lock(mutex); + return Writer(mutex, cache, key, cache_size_in_bytes, max_cache_size_in_bytes, max_cache_entries, max_cache_entry_size_in_bytes, max_cache_entry_size_in_rows, min_query_runtime); +} + +void QueryCache::reset() +{ + std::lock_guard lock(mutex); + cache.clear(); + times_executed.clear(); + cache_size_in_bytes = 0; +} + +size_t QueryCache::recordQueryRun(const Key & key) +{ + static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000; + + std::lock_guard times_executed_lock(mutex); + size_t times = ++times_executed[key]; + // Regularly drop times_executed to avoid DOS-by-unlimited-growth. + if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE) + times_executed.clear(); + return times; +} + +} diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h new file mode 100644 index 00000000000..45f48c7a558 --- /dev/null +++ b/src/Interpreters/Cache/QueryCache.h @@ -0,0 +1,168 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +/// Does AST contain non-deterministic functions like rand() and now()? +bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); + +/// Maps queries to query results. Useful to avoid repeated query calculation. +/// +/// The cache does not aim to be transactionally consistent (which is difficult to get right). For example, the cache is not invalidated +/// when data is inserted/deleted into/from tables referenced by queries in the cache. In such situations, incorrect results may be +/// returned. In order to still obtain sufficiently up-to-date query results, a expiry time (TTL) must be specified for each cache entry +/// after which it becomes stale and is ignored. Stale entries are removed opportunistically from the cache, they are only evicted when a +/// new entry is inserted and the cache has insufficient capacity. +class QueryCache +{ +public: + /// Represents a query result in the cache. + struct Key + { + /// ---------------------------------------------------- + /// The actual key (data which gets hashed): + + /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select) + const ASTPtr ast; + + /// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the + /// cache whenever the settings change. This is because certain settings (e.g. "additional_table_filters") can affect the query + /// result. + + /// ---------------------------------------------------- + /// Additional stuff data stored in the key, not hashed: + + /// Result metadata for constructing the pipe. + const Block header; + + /// Std::nullopt means that the associated entry can be read by other users. In general, sharing is a bad idea: First, it is + /// unlikely that different users pose the same queries. Second, sharing potentially breaches security. E.g. User A should not be + /// able to bypass row policies on some table by running the same queries as user B for whom no row policies exist. + const std::optional username; + + /// When does the entry expire? + const std::chrono::time_point expires_at; + + Key(ASTPtr ast_, + Block header_, const std::optional & username_, + std::chrono::time_point expires_at_); + + bool operator==(const Key & other) const; + String queryStringFromAst() const; + }; + + struct QueryResult + { + std::shared_ptr chunks = std::make_shared(); + size_t sizeInBytes() const; + + /// Notes: 1. For performance reasons, we cache the original result chunks as-is (no concatenation during cache insert or lookup). + /// 2. Ref-counting (shared_ptr) ensures that eviction of an entry does not affect queries which still read from the cache. + /// (this can also be achieved by copying the chunks during lookup but that would be under the cache lock --> too slow) + }; + +private: + struct KeyHasher + { + size_t operator()(const Key & key) const; + }; + + /// query --> query result + using Cache = std::unordered_map; + + /// query --> query execution count + using TimesExecuted = std::unordered_map; + +public: + /// Buffers multiple partial query result chunks (buffer()) and eventually stores them as cache entry (finalizeWrite()). + /// + /// Implementation note: Queries may throw exceptions during runtime, e.g. out-of-memory errors. In this case, no query result must be + /// written into the query cache. Unfortunately, neither the Writer nor the special transform added on top of the query pipeline which + /// holds the Writer know whether they are destroyed because the query ended successfully or because of an exception (otherwise, we + /// could simply implement a check in their destructors). To handle exceptions correctly nevertheless, we do the actual insert in + /// finalizeWrite() as opposed to the Writer destructor. This function is then called only for successful queries in finish_callback() + /// which runs before the transform and the Writer are destroyed, whereas for unsuccessful queries we do nothing (the Writer is + /// destroyed w/o inserting anything). + /// Queries may also be cancelled by the user, in which case IProcessor's cancel bit is set. FinalizeWrite() is only called if the + /// cancel bit is not set. + class Writer + { + public: + void buffer(Chunk && partial_query_result); + void finalizeWrite(); + private: + std::mutex & mutex; + Cache & cache TSA_GUARDED_BY(mutex); + const Key key; + size_t & cache_size_in_bytes TSA_GUARDED_BY(mutex); + const size_t max_cache_size_in_bytes; + const size_t max_cache_entries; + size_t new_entry_size_in_bytes = 0; + const size_t max_entry_size_in_bytes; + size_t new_entry_size_in_rows = 0; + const size_t max_entry_size_in_rows; + const std::chrono::time_point query_start_time = std::chrono::system_clock::now(); /// Writer construction and finalizeWrite() coincide with query start/end + const std::chrono::milliseconds min_query_runtime; + QueryResult query_result; + std::atomic skip_insert = false; + + Writer(std::mutex & mutex_, Cache & cache_, const Key & key_, + size_t & cache_size_in_bytes_, size_t max_cache_size_in_bytes_, + size_t max_cache_entries_, + size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_, + std::chrono::milliseconds min_query_runtime_); + + friend class QueryCache; /// for createWriter() + }; + + /// Looks up a query result for a key in the cache and (if found) constructs a pipe with the query result chunks as source. + class Reader + { + public: + bool hasCacheEntryForKey() const; + Pipe && getPipe(); /// must be called only if hasCacheEntryForKey() returns true + private: + Reader(const Cache & cache_, const Key & key, size_t & cache_size_in_bytes_, const std::lock_guard &); + Pipe pipe; + friend class QueryCache; /// for createReader() + }; + + QueryCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_); + + Reader createReader(const Key & key); + Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime); + + void reset(); + + /// Record new execution of query represented by key. Returns number of executions so far. + size_t recordQueryRun(const Key & key); + +private: + /// Implementation note: The query result implements a custom caching mechanism and doesn't make use of CacheBase, unlike many other + /// internal caches in ClickHouse. The main reason is that we don't need standard CacheBase (S)LRU eviction as the expiry times + /// associated with cache entries provide a "natural" eviction criterion. As a future TODO, we could make an expiry-based eviction + /// policy and use that with CacheBase (e.g. see #23706) + /// TODO To speed up removal of stale entries, we could also add another container sorted on expiry times which maps keys to iterators + /// into the cache. To insert an entry, add it to the cache + add the iterator to the sorted container. To remove stale entries, do a + /// binary search on the sorted container and erase all left of the found key. + mutable std::mutex mutex; + Cache cache TSA_GUARDED_BY(mutex); + TimesExecuted times_executed TSA_GUARDED_BY(mutex); + + size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// updated in each cache insert/delete + const size_t max_cache_size_in_bytes; + const size_t max_cache_entries; + const size_t max_cache_entry_size_in_bytes; + const size_t max_cache_entry_size_in_rows; + + friend class StorageSystemQueryCache; +}; + +using QueryCachePtr = std::shared_ptr; + +} diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 16906e9440e..08c083b1976 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -12,20 +13,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -namespace -{ - class SwapHelper - { - public: - SwapHelper(WriteBuffer & b1_, WriteBuffer & b2_) : b1(b1_), b2(b2_) { b1.swap(b2); } - ~SwapHelper() { b1.swap(b2); } - - private: - WriteBuffer & b1; - WriteBuffer & b2; - }; -} - WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) : WriteBufferFromFileDecorator(file_segment_->detachWriter()), file_segment(file_segment_) { diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index b457df53db9..413ff1db6bc 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::write is called for unsupported server revision"); writeBinary(static_cast(query_kind), out); if (empty()) @@ -102,7 +102,7 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::read is called for unsupported client revision"); UInt8 read_query_kind = 0; readBinary(read_query_kind, in); diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index b76434b23e7..bf3a66fed99 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -114,7 +114,7 @@ Cluster::Address::Address( port = static_cast(config.getInt(config_prefix + ".port", default_port)); if (!port) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Port is not specified in cluster configuration: {}", config_prefix + ".port"); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Port is not specified in cluster configuration: {}.port", config_prefix); is_local = isLocal(config.getInt(port_type, 0)); @@ -201,7 +201,7 @@ std::pair Cluster::Address::fromString(const String & host_port_ { auto pos = host_port_string.find_last_of(':'); if (pos == std::string::npos) - throw Exception("Incorrect : format " + host_port_string, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect : format {}", host_port_string); return {unescapeForFileName(host_port_string.substr(0, pos)), parse(host_port_string.substr(pos + 1))}; } @@ -213,7 +213,7 @@ String Cluster::Address::toFullString(bool use_compact_format) const { if (shard_index == 0 || replica_index == 0) // shard_num/replica_num like in system.clusters table - throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "shard_num/replica_num cannot be zero"); return fmt::format("shard{}_replica{}", shard_index, replica_index); } @@ -263,12 +263,12 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string) const char * colon = strchr(full_string.data(), ':'); if (!user_pw_end || !colon) - throw Exception("Incorrect user[:password]@host:port#default_database format " + full_string, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect user[:password]@host:port#default_database format {}", full_string); const bool has_pw = colon < user_pw_end; const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon; if (!host_end) - throw Exception("Incorrect address '" + full_string + "', it does not contain port", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect address '{}', it does not contain port", full_string); const char * has_db = strchr(full_string.data(), '#'); const char * port_end = has_db ? has_db : address_end; @@ -362,7 +362,7 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf } if (key.find('.') != String::npos) - throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cluster names with dots are not supported: '{}'", key); /// If old config is set and cluster config wasn't changed, don't update this cluster. if (!old_config || !isSameConfiguration(new_config, *old_config, config_prefix + "." + key)) @@ -396,7 +396,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, boost::range::remove_erase(config_keys, "secret"); if (config_keys.empty()) - throw Exception("No cluster elements (shard, node) specified in config at path " + config_prefix, ErrorCodes::SHARD_HAS_NO_CONNECTIONS); + throw Exception(ErrorCodes::SHARD_HAS_NO_CONNECTIONS, "No cluster elements (shard, node) specified in config at path {}", config_prefix); UInt32 current_shard_num = 1; for (const auto & key : config_keys) @@ -485,7 +485,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, } } else - throw Exception("Unknown element in config: " + replica_key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", replica_key); } Addresses shard_local_addresses; @@ -529,13 +529,13 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, }); } else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key); ++current_shard_num; } if (addresses_with_failover.empty()) - throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "There must be either 'node' or 'shard' elements in config"); initMisc(); } @@ -632,8 +632,7 @@ void Cluster::initMisc() for (const auto & shard_info : shards_info) { if (!shard_info.isLocal() && !shard_info.hasRemoteConnections()) - throw Exception("Found shard without any specified connection", - ErrorCodes::SHARD_HAS_NO_CONNECTIONS); + throw Exception(ErrorCodes::SHARD_HAS_NO_CONNECTIONS, "Found shard without any specified connection"); } for (const auto & shard_info : shards_info) @@ -672,7 +671,7 @@ std::unique_ptr Cluster::getClusterWithMultipleShards(const std::vector Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings) { if (from.addresses_with_failover.empty()) - throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty"); UInt32 shard_num = 0; std::set> unique_hosts; @@ -790,7 +789,7 @@ std::vector Cluster::filterAddressesByShardOrReplica(s const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const { if (!has_internal_replication) - throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "internal_replication is not set"); const auto & paths = insert_path_for_internal_replication; if (!use_compact_format) diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index ada04aa1cae..a7f5a914974 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -224,7 +224,7 @@ public: const ShardInfo & getAnyShardInfo() const { if (shards_info.empty()) - throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty"); return shards_info.front(); } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 2e2f886a50a..5c781c531ed 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -152,10 +151,8 @@ void SelectStreamFactory::createForShard( return; } else - throw Exception( - "Local replica of shard " + toString(shard_info.shard_num) - + " is stale (delay: " + toString(local_delay) + "s.), but no other replica configured", - ErrorCodes::ALL_REPLICAS_ARE_STALE); + throw Exception(ErrorCodes::ALL_REPLICAS_ARE_STALE, "Local replica of shard {} is stale (delay: " + "{}s.), but no other replica configured", shard_info.shard_num, toString(local_delay)); } if (!shard_info.hasRemoteConnections()) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 8ea6298c50b..fe31b4d8302 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -132,7 +132,7 @@ void executeQuery( const Settings & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); std::vector plans; SelectStreamFactory::Shards remote_shards; diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 9b264cb52a3..fd6fc27faec 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -48,8 +48,7 @@ void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const else if (isRightIdentifier(table_pos.first) && isLeftIdentifier(table_pos.second)) analyzed_join.addOnKeys(right, left); else - throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot detect left and right JOIN keys. JOIN ON section is ambiguous."); } void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, @@ -78,7 +77,7 @@ void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, co void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() { if (!asof_left_key || !asof_right_key) - throw Exception("No inequality in ASOF JOIN ON section.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "No inequality in ASOF JOIN ON section."); addJoinKeys(asof_left_key, asof_right_key, {JoinIdentifierPos::Left, JoinIdentifierPos::Right}); } @@ -87,8 +86,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & if (auto expr_from_table = getTableForIdentifiers(ast, false, data); isDeterminedIdentifier(expr_from_table)) data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table)); else - throw Exception("Unexpected identifier '" + ident.name() + "' in JOIN ON section", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Unexpected identifier '{}' in JOIN ON section", ident.name()); } void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data) @@ -101,8 +99,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as if (func.name == "equals" || inequality != ASOFJoinInequality::None) { if (func.arguments->children.size() != 2) - throw Exception("Function " + func.name + " takes two arguments, got '" + func.formatForErrorMessage() + "' instead", - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Function {} takes two arguments, got '{}' instead", + func.name, func.formatForErrorMessage()); } if (func.name == "equals") @@ -138,8 +136,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as if (data.is_asof && inequality != ASOFJoinInequality::None) { if (data.asof_left_key || data.asof_right_key) - throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "ASOF JOIN expects exactly one inequality in ON section. " + "Unexpected '{}'", queryToString(ast)); ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); @@ -149,8 +147,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as return; } - throw Exception("Unsupported JOIN ON conditions. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Unsupported JOIN ON conditions. Unexpected '{}'", + queryToString(ast)); } void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector & out) @@ -158,8 +156,8 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vectoras()) { if (func->name == "arrayJoin") - throw Exception("Not allowed function in JOIN ON. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Not allowed function in JOIN ON. Unexpected '{}'", + queryToString(ast)); } else if (const auto * ident = ast->as()) { @@ -199,7 +197,7 @@ const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifie it = aliases.find(identifier->name()); if (!max_attempts--) - throw Exception("Cannot unroll aliases for '" + identifier->name() + "'", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unroll aliases for '{}'", identifier->name()); } return identifier; @@ -254,7 +252,7 @@ JoinIdentifierPos CollectJoinOnKeysMatcher::getTableForIdentifiers(const ASTPtr in_left_table = !in_right_table; } else - throw Exception("Column '" + name + "' is ambiguous", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Column '{}' is ambiguous", name); } if (in_left_table) @@ -272,9 +270,9 @@ JoinIdentifierPos CollectJoinOnKeysMatcher::getTableForIdentifiers(const ASTPtr if (membership != JoinIdentifierPos::Unknown && membership != table_number) { if (throw_on_table_mix) - throw Exception("Invalid columns in JOIN ON section. Columns " - + identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName() - + " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Invalid columns in JOIN ON section. " + "Columns {} and {} are from different tables.", + identifiers[0]->getAliasOrColumnName(), ident->getAliasOrColumnName()); return JoinIdentifierPos::Unknown; } } diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 6c77539532f..03c173a73d9 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -167,7 +167,8 @@ IBlocksStreamPtr ConcurrentHashJoin::getNonJoinedBlocks( if (!JoinCommon::hasNonJoinedBlocks(*table_join)) return {}; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", + table_join->kind(), table_join->strictness()); } static ALWAYS_INLINE IColumn::Selector hashToSelector(const WeakHash32 & hash, size_t num_shards) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0f1126d2502..d9a7aa2e677 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -235,6 +236,7 @@ struct ContextSharedPart : boost::noncopyable mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. + mutable QueryCachePtr query_cache; /// Cache of query results. mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. ProcessList process_list; /// Executing queries at the moment. GlobalOvercommitTracker global_overcommit_tracker; @@ -622,7 +624,7 @@ ContextMutablePtr Context::createCopy(const ContextPtr & other) ContextMutablePtr Context::createCopy(const ContextWeakPtr & other) { auto ptr = other.lock(); - if (!ptr) throw Exception("Can't copy an expired context", ErrorCodes::LOGICAL_ERROR); + if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't copy an expired context"); return createCopy(ptr); } @@ -657,7 +659,7 @@ String Context::resolveDatabase(const String & database_name) const { String res = database_name.empty() ? getCurrentDatabase() : database_name; if (res.empty()) - throw Exception("Default database is not selected", ErrorCodes::UNKNOWN_DATABASE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Default database is not selected"); return res; } @@ -820,7 +822,7 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s VolumePtr volume = tmp_policy->getVolume(0); if (volume->getDisks().empty()) - throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files"); for (const auto & disk : volume->getDisks()) { @@ -1164,7 +1166,7 @@ const Block & Context::getScalar(const String & name) const { // This should be a logical error, but it fails the sql_fuzz test too // often, so 'bad arguments' for now. - throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Scalar {} doesn't exist (internal bug)", backQuoteIfNeed(name)); } return it->second; } @@ -1211,7 +1213,7 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder & auto lock = getLock(); if (external_tables_mapping.end() != external_tables_mapping.find(table_name)) - throw Exception("Temporary table " + backQuoteIfNeed(table_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists.", backQuoteIfNeed(table_name)); external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); } @@ -1444,8 +1446,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const void Context::addViewSource(const StoragePtr & storage) { if (view_source) - throw Exception( - "Temporary view source storage " + backQuoteIfNeed(view_source->getName()) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary view source storage {} already exists.", + backQuoteIfNeed(view_source->getName())); view_source = storage; } @@ -1595,13 +1597,13 @@ String Context::getInitialQueryId() const void Context::setCurrentDatabaseNameInGlobalContext(const String & name) { if (!isGlobalContext()) - throw Exception("Cannot set current database for non global context, this method should be used during server initialization", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot set current database for non global context, this method should " + "be used during server initialization"); auto lock = getLock(); if (!current_database.empty()) - throw Exception("Default database name cannot be changed in global context without server restart", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Default database name cannot be changed in global context without server restart"); current_database = name; } @@ -1693,7 +1695,7 @@ void Context::setMacros(std::unique_ptr && macros) ContextMutablePtr Context::getQueryContext() const { auto ptr = query_context.lock(); - if (!ptr) throw Exception("There is no query or query context has expired", ErrorCodes::THERE_IS_NO_QUERY); + if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired"); return ptr; } @@ -1706,20 +1708,20 @@ bool Context::isInternalSubquery() const ContextMutablePtr Context::getSessionContext() const { auto ptr = session_context.lock(); - if (!ptr) throw Exception("There is no session or session context has expired", ErrorCodes::THERE_IS_NO_SESSION); + if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_SESSION, "There is no session or session context has expired"); return ptr; } ContextMutablePtr Context::getGlobalContext() const { auto ptr = global_context.lock(); - if (!ptr) throw Exception("There is no global context or global context has expired", ErrorCodes::LOGICAL_ERROR); + if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); return ptr; } ContextMutablePtr Context::getBufferContext() const { - if (!buffer_context) throw Exception("There is no buffer context", ErrorCodes::LOGICAL_ERROR); + if (!buffer_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no buffer context"); return buffer_context; } @@ -1888,8 +1890,8 @@ BackupsWorker & Context::getBackupsWorker() const { auto lock = getLock(); - const bool allow_concurrent_backups = this->getConfigRef().getBool("allow_concurrent_backups", true); - const bool allow_concurrent_restores = this->getConfigRef().getBool("allow_concurrent_restores", true); + const bool allow_concurrent_backups = this->getConfigRef().getBool("backups.allow_concurrent_backups", true); + const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true); if (!shared->backups_worker) shared->backups_worker.emplace(getSettingsRef().backup_threads, getSettingsRef().restore_threads, allow_concurrent_backups, allow_concurrent_restores); @@ -1932,7 +1934,7 @@ void Context::setUncompressedCache(size_t max_size_in_bytes, const String & unco auto lock = getLock(); if (shared->uncompressed_cache) - throw Exception("Uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache has been already created."); shared->uncompressed_cache = std::make_shared(max_size_in_bytes, uncompressed_cache_policy); } @@ -1958,7 +1960,7 @@ void Context::setMarkCache(size_t cache_size_in_bytes, const String & mark_cache auto lock = getLock(); if (shared->mark_cache) - throw Exception("Mark cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache has been already created."); shared->mark_cache = std::make_shared(cache_size_in_bytes, mark_cache_policy); } @@ -1978,11 +1980,13 @@ void Context::dropMarkCache() const ThreadPool & Context::getLoadMarksThreadpool() const { + const auto & config = getConfigRef(); + auto lock = getLock(); if (!shared->load_marks_threadpool) { - constexpr size_t pool_size = 50; - constexpr size_t queue_size = 1000000; + auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50); + auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000); shared->load_marks_threadpool = std::make_unique(pool_size, pool_size, queue_size); } return *shared->load_marks_threadpool; @@ -1993,7 +1997,7 @@ void Context::setIndexUncompressedCache(size_t max_size_in_bytes) auto lock = getLock(); if (shared->index_uncompressed_cache) - throw Exception("Index uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache has been already created."); shared->index_uncompressed_cache = std::make_shared(max_size_in_bytes); } @@ -2019,7 +2023,7 @@ void Context::setIndexMarkCache(size_t cache_size_in_bytes) auto lock = getLock(); if (shared->index_mark_cache) - throw Exception("Index mark cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache has been already created."); shared->index_mark_cache = std::make_shared(cache_size_in_bytes); } @@ -2037,13 +2041,35 @@ void Context::dropIndexMarkCache() const shared->index_mark_cache->reset(); } +void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records) +{ + auto lock = getLock(); + + if (shared->query_cache) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created."); + + shared->query_cache = std::make_shared(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_records); +} + +QueryCachePtr Context::getQueryCache() const +{ + auto lock = getLock(); + return shared->query_cache; +} + +void Context::dropQueryCache() const +{ + auto lock = getLock(); + if (shared->query_cache) + shared->query_cache->reset(); +} void Context::setMMappedFileCache(size_t cache_size_in_num_entries) { auto lock = getLock(); if (shared->mmap_cache) - throw Exception("Mapped file cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created."); shared->mmap_cache = std::make_shared(cache_size_in_num_entries); } @@ -2078,6 +2104,9 @@ void Context::dropCaches() const if (shared->index_mark_cache) shared->index_mark_cache->reset(); + if (shared->query_cache) + shared->query_cache->reset(); + if (shared->mmap_cache) shared->mmap_cache->reset(); } @@ -2242,7 +2271,7 @@ void Context::setDDLWorker(std::unique_ptr ddl_worker) { auto lock = getLock(); if (shared->ddl_worker) - throw Exception("DDL background thread has already been initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "DDL background thread has already been initialized"); ddl_worker->startup(); shared->ddl_worker = std::move(ddl_worker); } @@ -2253,12 +2282,12 @@ DDLWorker & Context::getDDLWorker() const if (!shared->ddl_worker) { if (!hasZooKeeper()) - throw Exception("There is no Zookeeper configuration in server config", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config"); if (!hasDistributedDDL()) - throw Exception("There is no DistributedDDL configuration in server config", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no DistributedDDL configuration in server config"); - throw Exception("DDL background thread is not initialized", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "DDL background thread is not initialized"); } return *shared->ddl_worker; } @@ -2560,8 +2589,9 @@ void Context::setInterserverIOAddress(const String & host, UInt16 port) std::pair Context::getInterserverIOAddress() const { if (shared->interserver_io_host.empty() || shared->interserver_io_port == 0) - throw Exception("Parameter 'interserver_http(s)_port' required for replication is not specified in configuration file.", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "Parameter 'interserver_http(s)_port' required for replication is not specified " + "in configuration file."); return { shared->interserver_io_host, shared->interserver_io_port }; } @@ -2622,7 +2652,7 @@ std::shared_ptr Context::getCluster(const std::string & cluster_name) c { if (auto res = tryGetCluster(cluster_name)) return res; - throw Exception("Requested cluster '" + cluster_name + "' not found", ErrorCodes::BAD_GET); + throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); } @@ -2712,7 +2742,7 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrclusters_mutex); if (!shared->clusters) - throw Exception("Clusters are not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Clusters are not set"); shared->clusters->setCluster(cluster_name, cluster); } @@ -3159,7 +3189,7 @@ void Context::reloadConfig() const { /// Use mutex if callback may be changed after startup. if (!shared->config_reload_callback) - throw Exception("Can't reload config because config_reload_callback is not set.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't reload config because config_reload_callback is not set."); shared->config_reload_callback(); } @@ -3259,7 +3289,7 @@ const NameToNameMap & Context::getQueryParameters() const void Context::setQueryParameter(const String & name, const String & value) { if (!query_parameters.emplace(name, value).second) - throw Exception("Duplicate name " + backQuote(name) + " of query parameter", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Duplicate name {} of query parameter", backQuote(name)); } void Context::addQueryParameters(const NameToNameMap & parameters) @@ -3301,7 +3331,7 @@ std::shared_ptr Context::getActionLocksManager() const void Context::setExternalTablesInitializer(ExternalTablesInitializer && initializer) { if (external_tables_initializer_callback) - throw Exception("External tables initializer is already set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "External tables initializer is already set"); external_tables_initializer_callback = std::move(initializer); } @@ -3320,7 +3350,7 @@ void Context::initializeExternalTablesIfSet() void Context::setInputInitializer(InputInitializer && initializer) { if (input_initializer_callback) - throw Exception("Input initializer is already set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Input initializer is already set"); input_initializer_callback = std::move(initializer); } @@ -3329,7 +3359,7 @@ void Context::setInputInitializer(InputInitializer && initializer) void Context::initializeInput(const StoragePtr & input_storage) { if (!input_initializer_callback) - throw Exception("Input initializer is not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Input initializer is not set"); input_initializer_callback(shared_from_this(), input_storage); /// Reset callback @@ -3340,7 +3370,7 @@ void Context::initializeInput(const StoragePtr & input_storage) void Context::setInputBlocksReaderCallback(InputBlocksReader && reader) { if (input_blocks_reader) - throw Exception("Input blocks reader is already set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Input blocks reader is already set"); input_blocks_reader = std::move(reader); } @@ -3407,7 +3437,7 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w if (!storage_id) { if (exception) - exception->emplace("Both table name and UUID are empty", ErrorCodes::UNKNOWN_TABLE); + exception->emplace(ErrorCodes::UNKNOWN_TABLE, "Both table name and UUID are empty"); return storage_id; } @@ -3424,8 +3454,8 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w if (in_specified_database) return storage_id; /// NOTE There is no guarantees that table actually exists in database. if (exception) - exception->emplace("External and temporary tables have no database, but " + - storage_id.database_name + " is specified", ErrorCodes::UNKNOWN_TABLE); + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "External and temporary tables have no database, but {} is specified", + storage_id.database_name)); return StorageID::createEmpty(); } @@ -3468,7 +3498,7 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w if (current_database.empty()) { if (exception) - exception->emplace("Default database is not selected", ErrorCodes::UNKNOWN_DATABASE); + exception->emplace(ErrorCodes::UNKNOWN_DATABASE, "Default database is not selected"); return StorageID::createEmpty(); } storage_id.database_name = current_database; @@ -3477,7 +3507,7 @@ StorageID Context::resolveStorageIDImpl(StorageID storage_id, StorageNamespace w } if (exception) - exception->emplace("Cannot resolve database name for table " + storage_id.getNameForLogs(), ErrorCodes::UNKNOWN_TABLE); + exception->emplace(Exception(ErrorCodes::UNKNOWN_TABLE, "Cannot resolve database name for table {}", storage_id.getNameForLogs())); return StorageID::createEmpty(); } @@ -3610,7 +3640,7 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } @@ -3811,7 +3841,7 @@ ReadSettings Context::getReadSettings() const res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.enable_filesystem_cache_on_lower_level = settings.enable_filesystem_cache_on_lower_level; - res.max_query_cache_size = settings.max_query_cache_size; + res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 58478ab79b8..00dc4204496 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -81,6 +81,7 @@ class Macros; struct Progress; struct FileProgress; class Clusters; +class QueryCache; class QueryLog; class QueryThreadLog; class QueryViewsLog; @@ -859,6 +860,11 @@ public: std::shared_ptr getMMappedFileCache() const; void dropMMappedFileCache() const; + /// Create a cache of query results for statements which run repeatedly. + void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records); + std::shared_ptr getQueryCache() const; + void dropQueryCache() const; + /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. * - since caches are linked to file names, and become incorrect. diff --git a/src/Interpreters/Context_fwd.h b/src/Interpreters/Context_fwd.h index 2564912a297..c7928bbdbf3 100644 --- a/src/Interpreters/Context_fwd.h +++ b/src/Interpreters/Context_fwd.h @@ -39,7 +39,7 @@ struct WithContextImpl inline Shared getContext() const { auto ptr = context.lock(); - if (!ptr) throw Exception("Context has expired", ErrorCodes::LOGICAL_ERROR); + if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Context has expired"); return ptr; } diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index 09aebf874be..0675f2bb19c 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -48,14 +48,14 @@ struct JoinedElement void checkTableName(const DatabaseAndTableWithAlias & table, const String & current_database) const { if (!element.table_expression) - throw Exception("Not a table expression in JOIN (ARRAY JOIN?)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not a table expression in JOIN (ARRAY JOIN?)"); ASTTableExpression * table_expression = element.table_expression->as(); if (!table_expression) - throw Exception("Wrong table expression in JOIN", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong table expression in JOIN"); if (!table.same(DatabaseAndTableWithAlias(*table_expression, current_database))) - throw Exception("Inconsistent table names", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent table names"); } void rewriteCommaToCross() @@ -178,7 +178,7 @@ std::vector getTables(const ASTSelectQuery & select) { const auto * table_element = child->as(); if (!table_element) - throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: TablesInSelectQueryElement expected"); JoinedElement & t = joined_tables.emplace_back(*table_element); t.rewriteCommaToCross(); diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 0425b3de99b..799f1b0b4f4 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -248,7 +248,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log) { auto * query_on_cluster = dynamic_cast(query.get()); if (!query_on_cluster) - throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Received unknown DDL query"); cluster_name = query_on_cluster->cluster; cluster = context->tryGetCluster(cluster_name); @@ -321,7 +321,8 @@ bool DDLTask::tryFindHostInCluster() { if (!query_with_table->database) throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, - "For a distributed DDL on circular replicated cluster its table name must be qualified by database name."); + "For a distributed DDL on circular replicated cluster its table name " + "must be qualified by database name."); if (default_database == query_with_table->getDatabase()) return true; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index d427e97828b..0f91212e6a9 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -994,7 +994,7 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP String DDLWorker::enqueueQuery(DDLLogEntry & entry) { if (entry.hosts.empty()) - throw Exception("Empty host list in a distributed DDL task", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty host list in a distributed DDL task"); auto zookeeper = getAndSetZooKeeper(); diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp index 7fb581c1b4d..9b6ce4f22d3 100644 --- a/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -37,7 +37,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & ident else if (identifier.name_parts.size() == 1) table = identifier.name_parts[0]; else - throw Exception("Logical error: invalid identifier", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: invalid identifier"); if (database.empty()) database = current_database; @@ -50,7 +50,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const else if (const auto * identifier = node->as()) *this = DatabaseAndTableWithAlias(*identifier, current_database); else - throw Exception("Logical error: identifier or table identifier expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: identifier or table identifier expected"); } DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database) @@ -70,7 +70,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & alias = table_expression.subquery->tryGetAlias(); } else - throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no known elements in ASTTableExpression"); } bool DatabaseAndTableWithAlias::satisfies(const DatabaseAndTableWithAlias & db_table, bool table_may_be_an_alias) const diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index af88146c7b2..ed8545d6d01 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -136,7 +136,7 @@ StoragePtr TemporaryTableHolder::getTable() const { auto table = temporary_tables->tryGetTable("_tmp_" + toString(id), getContext()); if (!table) - throw Exception("Temporary table " + getGlobalTableID().getNameForLogs() + " not found", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary table {} not found", getGlobalTableID().getNameForLogs()); return table; } @@ -298,7 +298,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( /// If table_id has no UUID, then the name of database was specified by user and table_id was not resolved through context. /// Do not allow access to TEMPORARY_DATABASE because it contains all temporary tables of all contexts and users. if (exception) - exception->emplace(fmt::format("Direct access to `{}` database is not allowed", TEMPORARY_DATABASE), ErrorCodes::DATABASE_ACCESS_DENIED); + exception->emplace(Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Direct access to `{}` database is not allowed", TEMPORARY_DATABASE)); return {}; } @@ -309,7 +309,7 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( if (databases.end() == it) { if (exception) - exception->emplace(fmt::format("Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName())), ErrorCodes::UNKNOWN_DATABASE); + exception->emplace(Exception(ErrorCodes::UNKNOWN_DATABASE, "Database {} doesn't exist", backQuoteIfNeed(table_id.getDatabaseName()))); return {}; } database = it->second; @@ -390,7 +390,7 @@ void DatabaseCatalog::assertDatabaseDoesntExistUnlocked(const String & database_ { assert(!database_name.empty()); if (databases.end() != databases.find(database_name)) - throw Exception("Database " + backQuoteIfNeed(database_name) + " already exists.", ErrorCodes::DATABASE_ALREADY_EXISTS); + throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", backQuoteIfNeed(database_name)); } void DatabaseCatalog::attachDatabase(const String & database_name, const DatabasePtr & database) @@ -409,7 +409,7 @@ void DatabaseCatalog::attachDatabase(const String & database_name, const Databas DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const String & database_name, bool drop, bool check_empty) { if (database_name == TEMPORARY_DATABASE) - throw Exception("Cannot detach database with temporary tables.", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Cannot detach database with temporary tables."); DatabasePtr db; { @@ -427,8 +427,7 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri try { if (!db->empty()) - throw Exception("New table appeared in database being dropped or detached. Try again.", - ErrorCodes::DATABASE_NOT_EMPTY); + throw Exception(ErrorCodes::DATABASE_NOT_EMPTY, "New table appeared in database being dropped or detached. Try again."); if (!drop) db->assertCanBeDetached(false); } @@ -500,7 +499,7 @@ DatabasePtr DatabaseCatalog::getDatabase(const UUID & uuid) const { auto db_and_table = tryGetByUUID(uuid); if (!db_and_table.first || db_and_table.second) - throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database UUID {} does not exist", toString(uuid)); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database UUID {} does not exist", uuid); return db_and_table.first; } @@ -590,11 +589,11 @@ void DatabaseCatalog::addUUIDMapping(const UUID & uuid, const DatabasePtr & data /// We are trying to replace existing mapping (prev_database != nullptr), it's logical error if (database || table) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} already exists", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} already exists", uuid); /// Normally this should never happen, but it's possible when the same UUIDs are explicitly specified in different CREATE queries, /// so it's not LOGICAL_ERROR throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Mapping for table with UUID={} already exists. It happened due to UUID collision, " - "most likely because some not random UUIDs were manually specified in CREATE queries.", toString(uuid)); + "most likely because some not random UUIDs were manually specified in CREATE queries.", uuid); } void DatabaseCatalog::removeUUIDMapping(const UUID & uuid) @@ -604,7 +603,7 @@ void DatabaseCatalog::removeUUIDMapping(const UUID & uuid) std::lock_guard lock{map_part.mutex}; auto it = map_part.map.find(uuid); if (it == map_part.map.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", uuid); it->second = {}; } @@ -614,7 +613,7 @@ void DatabaseCatalog::removeUUIDMappingFinally(const UUID & uuid) UUIDToStorageMapPart & map_part = uuid_map[getFirstLevelIdx(uuid)]; std::lock_guard lock{map_part.mutex}; if (!map_part.map.erase(uuid)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", uuid); } void DatabaseCatalog::updateUUIDMapping(const UUID & uuid, DatabasePtr database, StoragePtr table) @@ -625,7 +624,7 @@ void DatabaseCatalog::updateUUIDMapping(const UUID & uuid, DatabasePtr database, std::lock_guard lock{map_part.mutex}; auto it = map_part.map.find(uuid); if (it == map_part.map.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", uuid); auto & prev_database = it->second.first; auto & prev_table = it->second.second; assert(prev_database && prev_table); @@ -655,8 +654,7 @@ DatabaseCatalog & DatabaseCatalog::init(ContextMutablePtr global_context_) { if (database_catalog) { - throw Exception("Database catalog is initialized twice. This is a bug.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Database catalog is initialized twice. This is a bug."); } database_catalog.reset(new DatabaseCatalog(global_context_)); @@ -668,8 +666,7 @@ DatabaseCatalog & DatabaseCatalog::instance() { if (!database_catalog) { - throw Exception("Database catalog is not initialized. This is a bug.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Database catalog is not initialized. This is a bug."); } return *database_catalog; @@ -729,7 +726,7 @@ DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & return std::make_unique(db_guard.first, db_guard.second, std::move(lock), table, database); } -std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) +std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) { DDLGuards::iterator db_guard_iter; { @@ -1282,7 +1279,7 @@ TemporaryLockForUUIDDirectory & TemporaryLockForUUIDDirectory::operator = (Tempo } -DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name) +DDLGuard::DDLGuard(Map & map_, SharedMutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name) : map(map_), db_mutex(db_mutex_), guards_lock(std::move(guards_lock_)) { it = map.emplace(elem, Entry{std::make_unique(), 0}).first; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index a3fa4515a69..5dc3f90b7f4 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -17,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -58,7 +58,7 @@ public: DDLGuard( Map & map_, - std::shared_mutex & db_mutex_, + SharedMutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name); @@ -69,7 +69,7 @@ public: private: Map & map; - std::shared_mutex & db_mutex; + SharedMutex & db_mutex; Map::iterator it; std::unique_lock guards_lock; std::unique_lock table_lock; @@ -142,7 +142,7 @@ public: /// Get an object that protects the table from concurrently executing multiple DDL operations. DDLGuardPtr getDDLGuard(const String & database, const String & table); /// Get an object that protects the database from concurrent DDL queries all tables in the database - std::unique_lock getExclusiveDDLGuardForDatabase(const String & database); + std::unique_lock getExclusiveDDLGuardForDatabase(const String & database); void assertDatabaseExists(const String & database_name) const; @@ -298,7 +298,7 @@ private: /// For the duration of the operation, an element is placed here, and an object is returned, /// which deletes the element in the destructor when counter becomes zero. /// In case the element already exists, waits when query will be executed in other thread. See class DDLGuard below. - using DatabaseGuard = std::pair; + using DatabaseGuard = std::pair; using DDLGuards = std::map; DDLGuards ddl_guards TSA_GUARDED_BY(ddl_guards_mutex); /// If you capture mutex and ddl_guards_mutex, then you need to grab them strictly in this order. diff --git a/src/Interpreters/DuplicateOrderByVisitor.cpp b/src/Interpreters/DuplicateOrderByVisitor.cpp index b3573af9f8c..569253ff78d 100644 --- a/src/Interpreters/DuplicateOrderByVisitor.cpp +++ b/src/Interpreters/DuplicateOrderByVisitor.cpp @@ -78,7 +78,7 @@ void DuplicateOrderByFromSubqueriesData::visit(ASTSelectQuery & select_query, AS { auto * ast = child->as(); if (!ast || ast->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (ast->with_fill) return; diff --git a/src/Interpreters/EmbeddedDictionaries.cpp b/src/Interpreters/EmbeddedDictionaries.cpp index 0b2efaf3dbe..6c0ccce66b5 100644 --- a/src/Interpreters/EmbeddedDictionaries.cpp +++ b/src/Interpreters/EmbeddedDictionaries.cpp @@ -143,7 +143,7 @@ EmbeddedDictionaries::~EmbeddedDictionaries() void EmbeddedDictionaries::reload() { if (!reloadImpl(true, true)) - throw Exception("Some embedded dictionaries were not successfully reloaded", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Some embedded dictionaries were not successfully reloaded"); } diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 581a8cd87ee..7a5fc67596f 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -216,14 +216,14 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr } if (block.rows() != 1) - throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); Block tmp_block; while (tmp_block.rows() == 0 && executor.pull(tmp_block)) ; if (tmp_block.rows() != 0) - throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); } block = materializeBlock(block); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 5ea29615942..97555feb426 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -67,7 +67,7 @@ ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const Expressio if (settings.max_temporary_columns && num_columns > settings.max_temporary_columns) throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Too many temporary columns: {}. Maximum: {}", - actions_dag->dumpNames(), std::to_string(settings.max_temporary_columns)); + actions_dag->dumpNames(), settings.max_temporary_columns); } ExpressionActionsPtr ExpressionActions::clone() const @@ -536,9 +536,9 @@ void ExpressionActions::checkLimits(const ColumnsWithTypeAndName & columns) cons if (column.column && !isColumnConst(*column.column)) list_of_non_const_columns << "\n" << column.name; - throw Exception("Too many temporary non-const columns:" + list_of_non_const_columns.str() - + ". Maximum: " + std::to_string(settings.max_temporary_non_const_columns), - ErrorCodes::TOO_MANY_TEMPORARY_NON_CONST_COLUMNS); + throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_NON_CONST_COLUMNS, + "Too many temporary non-const columns:{}. Maximum: {}", + list_of_non_const_columns.str(), settings.max_temporary_non_const_columns); } } } @@ -575,7 +575,7 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon { auto & res_column = columns[action.result_position]; if (res_column.type || res_column.column) - throw Exception("Result column is not empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Result column is not empty"); res_column.type = action.node->result_type; res_column.name = action.node->result_name; @@ -622,7 +622,7 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon const auto * array = getArrayJoinColumnRawPtr(array_join_key.column); if (!array) - throw Exception("ARRAY JOIN of not array nor map: " + action.node->result_name, ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN of not array nor map: {}", action.node->result_name); for (auto & column : columns) if (column.column) @@ -812,7 +812,7 @@ NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & c } if (!min_size) - throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No available columns"); return result; } @@ -930,7 +930,7 @@ bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) con void ExpressionActionsChain::addStep(NameSet non_constant_inputs) { if (steps.empty()) - throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add action to empty ExpressionActionsChain"); ColumnsWithTypeAndName columns = steps.back()->getResultColumns(); for (auto & column : columns) diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index faefe0985f7..48f18b3b407 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -260,7 +260,7 @@ struct ExpressionActionsChain : WithContext { if (allow_empty) return {}; - throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty ExpressionActionsChain"); } return typeid_cast(steps.back().get())->actions_dag; @@ -269,7 +269,7 @@ struct ExpressionActionsChain : WithContext Step & getLastStep() { if (steps.empty()) - throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty ExpressionActionsChain"); return *steps.back(); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index fbd076ee746..80cc0414643 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -129,7 +129,7 @@ bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column) if (isNotCreatable(col.type->getTypeId())) { if (throw_if_cannot_create_column) - throw Exception("Cannot create column of type " + col.type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot create column of type {}", col.type->getName()); return false; } @@ -314,7 +314,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) const auto & column_name = group_elements_ast[j]->getColumnName(); const auto * node = temp_actions->tryFindInOutputs(column_name); if (!node) - throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); /// Only removes constant keys if it's an initiator or distributed_group_by_no_merge is enabled. if (getContext()->getClientInfo().distributed_depth == 0 || settings.distributed_group_by_no_merge > 0) @@ -368,7 +368,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) const auto & column_name = group_asts[i]->getColumnName(); const auto * node = temp_actions->tryFindInOutputs(column_name); if (!node) - throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); /// Only removes constant keys if it's an initiator or distributed_group_by_no_merge is enabled. if (getContext()->getClientInfo().distributed_depth == 0 || settings.distributed_group_by_no_merge > 0) @@ -930,14 +930,14 @@ const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const { const auto * select_query = query->as(); if (!select_query) - throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not a select query"); return select_query; } const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const { if (!has_aggregation) - throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No aggregation"); return getSelectQuery(); } @@ -1251,8 +1251,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( const auto & node = step.actions()->findInOutputs(prewhere_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) - throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + filter_type->getName()); ActionsDAGPtr prewhere_actions; { @@ -1334,8 +1334,8 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const auto & node = step.actions()->findInOutputs(where_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) - throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in WHERE: {}", + filter_type->getName()); return true; } @@ -1556,7 +1556,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai { auto * ast = child->as(); if (!ast || ast->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (getContext()->getSettingsRef().enable_positional_arguments) replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); @@ -2230,7 +2230,7 @@ void ExpressionAnalysisResult::checkActions() const if (actions) for (const auto & node : actions->getNodes()) if (node.type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("PREWHERE cannot contain ARRAY JOIN action", ErrorCodes::ILLEGAL_PREWHERE); + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "PREWHERE cannot contain ARRAY JOIN action"); }; check_actions(prewhere_info->prewhere_actions); diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index ea2b9045120..9858b27d57a 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -1000,7 +1000,7 @@ private: /// Loading. auto [new_object, new_exception] = loadSingleObject(name, *info->config, previous_version_as_base_for_loading); if (!new_object && !new_exception) - throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No object created and no exception raised for {}", type_name); /// Saving the result of the loading. { @@ -1440,7 +1440,7 @@ void ExternalLoader::checkLoaded(const ExternalLoader::LoadResult & result, if (result.object && (!check_no_errors || !result.exception)) return; if (result.status == ExternalLoader::Status::LOADING) - throw Exception(type_name + " '" + result.name + "' is still loading", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} '{}' is still loading", type_name, result.name); if (result.exception) { // Exception is shared for multiple threads. @@ -1466,9 +1466,9 @@ void ExternalLoader::checkLoaded(const ExternalLoader::LoadResult & result, } } if (result.status == ExternalLoader::Status::NOT_EXIST) - throw Exception(type_name + " '" + result.name + "' not found", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} '{}' not found", type_name, result.name); if (result.status == ExternalLoader::Status::NOT_LOADED) - throw Exception(type_name + " '" + result.name + "' not tried to load", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} '{}' not tried to load", type_name, result.name); } void ExternalLoader::checkLoaded(const ExternalLoader::LoadResults & results, diff --git a/src/Interpreters/ExternalLoaderTempConfigRepository.cpp b/src/Interpreters/ExternalLoaderTempConfigRepository.cpp index c4210875867..10fc61a2ed0 100644 --- a/src/Interpreters/ExternalLoaderTempConfigRepository.cpp +++ b/src/Interpreters/ExternalLoaderTempConfigRepository.cpp @@ -31,7 +31,7 @@ bool ExternalLoaderTempConfigRepository::exists(const String & path_) Poco::Timestamp ExternalLoaderTempConfigRepository::getUpdateTime(const String & path_) { if (!exists(path_)) - throw Exception("Loadable " + path_ + " not found", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Loadable {} not found", path_); return creation_time; } @@ -39,7 +39,7 @@ Poco::Timestamp ExternalLoaderTempConfigRepository::getUpdateTime(const String & LoadablesConfigurationPtr ExternalLoaderTempConfigRepository::load(const String & path_) { if (!exists(path_)) - throw Exception("Loadable " + path_ + " not found", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Loadable {} not found", path_); return config; } diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h index 036d50ba4d6..3f5804c39a0 100644 --- a/src/Interpreters/GetAggregatesVisitor.h +++ b/src/Interpreters/GetAggregatesVisitor.h @@ -66,8 +66,8 @@ private: if (isAggregateFunction(node)) { if (data.assert_no_aggregates) - throw Exception("Aggregate function " + node.getColumnName() + " is found " + String(data.assert_no_aggregates) + " in query", - ErrorCodes::ILLEGAL_AGGREGATION); + throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, "Aggregate function {} is found {} in query", + node.getColumnName(), String(data.assert_no_aggregates)); String column_name = node.getColumnName(); if (data.uniq_names.count(column_name)) @@ -79,8 +79,8 @@ private: else if (node.is_window_function) { if (data.assert_no_windows) - throw Exception("Window function " + node.getColumnName() + " is found " + String(data.assert_no_windows) + " in query", - ErrorCodes::ILLEGAL_AGGREGATION); + throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, "Window function {} is found {} in query", + node.getColumnName(), String(data.assert_no_windows)); String column_name = node.getColumnName(); if (data.uniq_names.count(column_name)) diff --git a/src/Interpreters/GinFilter.cpp b/src/Interpreters/GinFilter.cpp index 8965d3721d2..4662128e8ab 100644 --- a/src/Interpreters/GinFilter.cpp +++ b/src/Interpreters/GinFilter.cpp @@ -1,31 +1,34 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace DB { + namespace ErrorCodes { extern const int BAD_ARGUMENTS; } + GinFilterParameters::GinFilterParameters(size_t ngrams_, Float64 density_) - : ngrams(ngrams_), density(density_) + : ngrams(ngrams_) + , density(density_) { if (ngrams > 8) - throw Exception("The size of gin filter cannot be greater than 8", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The size of inverted index filter cannot be greater than 8"); if (density <= 0 || density > 1) - throw Exception("The density of gin filter must be between 0 and 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The density inverted index gin filter must be between 0 and 1"); } GinFilter::GinFilter(const GinFilterParameters & params_) @@ -33,23 +36,23 @@ GinFilter::GinFilter(const GinFilterParameters & params_) { } -void GinFilter::add(const char* data, size_t len, UInt32 rowID, GinIndexStorePtr& store, UInt64 limit) const +void GinFilter::add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store, UInt64 limit) const { if (len > FST::MAX_TERM_LENGTH) return; String term(data, len); - auto it = store->getPostings().find(term); + auto it = store->getPostingsListBuilder().find(term); - if (it != store->getPostings().end()) + if (it != store->getPostingsListBuilder().end()) { if (!it->second->contains(rowID)) it->second->add(rowID); } else { - UInt64 threshold = std::lround(limit * params.density); - GinIndexStore::GinIndexPostingsBuilderPtr builder = std::make_shared(threshold); + UInt64 size_limit = std::lround(limit * params.density); + auto builder = std::make_shared(size_limit); builder->add(rowID); store->setPostingsBuilder(term, builder); @@ -66,7 +69,7 @@ void GinFilter::addRowRangeToGinFilter(UInt32 segmentID, UInt32 rowIDStart, UInt if (!rowid_ranges.empty()) { /// Try to merge the rowID range with the last one in the container - GinSegmentWithRowIDRange & last_rowid_range = rowid_ranges.back(); + GinSegmentWithRowIdRange & last_rowid_range = rowid_ranges.back(); if (last_rowid_range.segment_id == segmentID && last_rowid_range.range_end+1 == rowIDStart) @@ -80,93 +83,17 @@ void GinFilter::addRowRangeToGinFilter(UInt32 segmentID, UInt32 rowIDStart, UInt void GinFilter::clear() { + query_string.clear(); terms.clear(); rowid_ranges.clear(); - query_string.clear(); } -bool GinFilter::hasEmptyPostingsList(const PostingsCache& postings_cache) -{ - if (postings_cache.empty()) - return true; - - for (const auto& term_postings : postings_cache) - { - const SegmentedPostingsListContainer& container = term_postings.second; - if (container.empty()) - return true; - } - return false; -} - -bool GinFilter::matchInRange(const PostingsCache& postings_cache, UInt32 segment_id, UInt32 range_start, UInt32 range_end) -{ - /// Check for each terms - GinIndexPostingsList intersection_result; - bool intersection_result_init = false; - - for (const auto& term_postings : postings_cache) - { - /// Check if it is in the same segment by searching for segment_id - const SegmentedPostingsListContainer& container = term_postings.second; - auto container_it = container.find(segment_id); - if (container_it == container.cend()) - { - return false; - } - auto min_in_container = container_it->second->minimum(); - auto max_in_container = container_it->second->maximum(); - - //check if the postings list has always match flag - if (container_it->second->cardinality() == 1 && UINT32_MAX == min_in_container) - { - continue; //always match - } - - if (range_start > max_in_container || min_in_container > range_end) - { - return false; - } - - /// Delay initialization as late as possible - if (!intersection_result_init) - { - intersection_result_init = true; - intersection_result.addRange(range_start, range_end+1); - } - intersection_result &= *container_it->second; - if (intersection_result.cardinality() == 0) - { - return false; - } - } - return true; -} - -bool GinFilter::match(const PostingsCache& postings_cache) const -{ - if (hasEmptyPostingsList(postings_cache)) - { - return false; - } - - /// Check for each row ID ranges - for (const auto &rowid_range: rowid_ranges) - { - if (matchInRange(postings_cache, rowid_range.segment_id, rowid_range.range_start, rowid_range.range_end)) - { - return true; - } - } - return false; -} - -bool GinFilter::contains(const GinFilter & filter, PostingsCacheForStore &cache_store) const +bool GinFilter::contains(const GinFilter & filter, PostingsCacheForStore & cache_store) const { if (filter.getTerms().empty()) return true; - PostingsCachePtr postings_cache = cache_store.getPostings(filter.getQueryString()); + GinPostingsCachePtr postings_cache = cache_store.getPostings(filter.getQueryString()); if (postings_cache == nullptr) { GinIndexStoreDeserializer reader(cache_store.store); @@ -177,9 +104,73 @@ bool GinFilter::contains(const GinFilter & filter, PostingsCacheForStore &cache_ return match(*postings_cache); } -String GinFilter::getName() +namespace { - return FilterName; + +/// Helper method for checking if postings list cache is empty +bool hasEmptyPostingsList(const GinPostingsCache & postings_cache) +{ + if (postings_cache.empty()) + return true; + + for (const auto & term_postings : postings_cache) + { + const GinSegmentedPostingsListContainer & container = term_postings.second; + if (container.empty()) + return true; + } + return false; +} + +/// Helper method to check if the postings list cache has intersection with given row ID range +bool matchInRange(const GinPostingsCache & postings_cache, UInt32 segment_id, UInt32 range_start, UInt32 range_end) +{ + /// Check for each term + GinIndexPostingsList intersection_result; + bool intersection_result_init = false; + + for (const auto & term_postings : postings_cache) + { + /// Check if it is in the same segment by searching for segment_id + const GinSegmentedPostingsListContainer & container = term_postings.second; + auto container_it = container.find(segment_id); + if (container_it == container.cend()) + return false; + auto min_in_container = container_it->second->minimum(); + auto max_in_container = container_it->second->maximum(); + + //check if the postings list has always match flag + if (container_it->second->cardinality() == 1 && UINT32_MAX == min_in_container) + continue; //always match + + if (range_start > max_in_container || min_in_container > range_end) + return false; + + /// Delay initialization as late as possible + if (!intersection_result_init) + { + intersection_result_init = true; + intersection_result.addRange(range_start, range_end+1); + } + intersection_result &= *container_it->second; + if (intersection_result.cardinality() == 0) + return false; + } + return true; +} + +} + +bool GinFilter::match(const GinPostingsCache & postings_cache) const +{ + if (hasEmptyPostingsList(postings_cache)) + return false; + + /// Check for each row ID ranges + for (const auto & rowid_range: rowid_ranges) + if (matchInRange(postings_cache, rowid_range.segment_id, rowid_range.range_start, rowid_range.range_end)) + return true; + return false; } } diff --git a/src/Interpreters/GinFilter.h b/src/Interpreters/GinFilter.h index 0bcd4156f94..8985d84f215 100644 --- a/src/Interpreters/GinFilter.h +++ b/src/Interpreters/GinFilter.h @@ -1,19 +1,23 @@ #pragma once +#include #include #include -#include + namespace DB { + +static inline constexpr auto INVERTED_INDEX_NAME = "inverted"; + struct GinFilterParameters { - explicit GinFilterParameters(size_t ngrams_, Float64 density_); + GinFilterParameters(size_t ngrams_, Float64 density_); size_t ngrams; Float64 density; }; -struct GinSegmentWithRowIDRange +struct GinSegmentWithRowIdRange { /// Segment ID of the row ID range UInt32 segment_id; @@ -25,19 +29,20 @@ struct GinSegmentWithRowIDRange UInt32 range_end; }; +using GinSegmentWithRowIdRangeVector = std::vector; + /// GinFilter provides underlying functionalities for building inverted index and also /// it does filtering the unmatched rows according to its query string. /// It also builds and uses skipping index which stores (segmentID, RowIDStart, RowIDEnd) triples. class GinFilter { public: - using GinSegmentWithRowIDRanges = std::vector; - explicit GinFilter(const GinFilterParameters& params_); + explicit GinFilter(const GinFilterParameters & params_); - /// Add term(which length is 'len' and located at 'data') and its row ID to - /// the postings list builder for building inverted index for the given store. - void add(const char* data, size_t len, UInt32 rowID, GinIndexStorePtr& store, UInt64 limit) const; + /// Add term (located at 'data' with length 'len') and its row ID to the postings list builder + /// for building inverted index for the given store. + void add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store, UInt64 limit) const; /// Accumulate (segmentID, RowIDStart, RowIDEnd) for building skipping index void addRowRangeToGinFilter(UInt32 segmentID, UInt32 rowIDStart, UInt32 rowIDEnd); @@ -45,47 +50,33 @@ public: /// Clear the content void clear(); - /// Check if the filter(built from query string) contains any rows in given filter 'af' by using + /// Check if the filter (built from query string) contains any rows in given filter by using /// given postings list cache - bool contains(const GinFilter & filter, PostingsCacheForStore &cache_store) const; - - /// Const getter for the row ID ranges - const GinSegmentWithRowIDRanges& getFilter() const { return rowid_ranges; } - - /// Mutable getter for the row ID ranges - GinSegmentWithRowIDRanges& getFilter() { return rowid_ranges; } + bool contains(const GinFilter & filter, PostingsCacheForStore & cache_store) const; /// Set the query string of the filter - void setQueryString(const char* data, size_t len) + void setQueryString(const char * data, size_t len) { query_string = String(data, len); } - /// Const getter of the query string - const String &getQueryString() const { return query_string; } - /// Add term which are tokens generated from the query string - void addTerm(const char* data, size_t len) + void addTerm(const char * data, size_t len) { if (len > FST::MAX_TERM_LENGTH) return; terms.push_back(String(data, len)); } - /// Const getter of terms(generated from the query string) - const std::vector& getTerms() const { return terms;} + /// Getter + const String & getQueryString() const { return query_string; } + const std::vector & getTerms() const { return terms; } + const GinSegmentWithRowIdRangeVector & getFilter() const { return rowid_ranges; } + GinSegmentWithRowIdRangeVector & getFilter() { return rowid_ranges; } - /// Check if the given postings list cache has matched rows by using the filter - bool match(const PostingsCache& postings_cache) const; - - /// Get filter name ("inverted") - static String getName(); - - /// Constant of filter name - static constexpr auto FilterName = "inverted"; private: /// Filter parameters - const GinFilterParameters& params; + const GinFilterParameters & params; /// Query string of the filter String query_string; @@ -94,15 +85,12 @@ private: std::vector terms; /// Row ID ranges which are (segmentID, RowIDStart, RowIDEnd) - GinSegmentWithRowIDRanges rowid_ranges; + GinSegmentWithRowIdRangeVector rowid_ranges; - /// Helper method for checking if postings list cache is empty - static bool hasEmptyPostingsList(const PostingsCache& postings_cache); - - /// Helper method to check if the postings list cache has intersection with given row ID range - static bool matchInRange(const PostingsCache& postings_cache, UInt32 segment_id, UInt32 range_start, UInt32 range_end); + /// Check if the given postings list cache has matched rows by using the filter + bool match(const GinPostingsCache & postings_cache) const; }; -using GinFilterPtr = std::shared_ptr; +using GinFilters = std::vector; } diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index dc9294be878..b105cae31c6 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -92,7 +92,7 @@ public: } if (!subquery_or_table_name) - throw Exception("Global subquery requires subquery or table name", ErrorCodes::WRONG_GLOBAL_SUBQUERY); + throw Exception(ErrorCodes::WRONG_GLOBAL_SUBQUERY, "Global subquery requires subquery or table name"); if (is_table) { diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index b8c6c639e82..51d4c7d1f4b 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -41,7 +41,7 @@ namespace public: AccumulatedBlockReader(TemporaryFileStream & reader_, std::mutex & mutex_, - size_t result_block_size_ = DEFAULT_BLOCK_SIZE * 8) + size_t result_block_size_ = 0) : reader(reader_) , mutex(mutex_) , result_block_size(result_block_size_) @@ -59,18 +59,22 @@ namespace Blocks blocks; size_t rows_read = 0; - while (rows_read < result_block_size) + do { Block block = reader.read(); rows_read += block.rows(); if (!block) { eof = true; + if (blocks.size() == 1) + return blocks.front(); return concatenateBlocks(blocks); } blocks.push_back(std::move(block)); - } + } while (rows_read < result_block_size); + if (blocks.size() == 1) + return blocks.front(); return concatenateBlocks(blocks); } @@ -118,21 +122,12 @@ class GraceHashJoin::FileBucket : boost::noncopyable public: using BucketLock = std::unique_lock; - struct Stats - { - TemporaryFileStream::Stat left; - TemporaryFileStream::Stat right; - }; - - explicit FileBucket(size_t bucket_index_, - TemporaryFileStream & left_file_, - TemporaryFileStream & right_file_, - Poco::Logger * log_) + explicit FileBucket(size_t bucket_index_, TemporaryFileStream & left_file_, TemporaryFileStream & right_file_, Poco::Logger * log_) : idx{bucket_index_} , left_file{left_file_} , right_file{right_file_} , state{State::WRITING_BLOCKS} - , log(log_) + , log{log_} { } @@ -168,21 +163,18 @@ public: bool empty() const { return is_empty.load(); } - Stats getStat() const { return stats; } - AccumulatedBlockReader startJoining() { LOG_TRACE(log, "Joining file bucket {}", idx); - { std::unique_lock left_lock(left_file_mutex); std::unique_lock right_lock(right_file_mutex); - stats.left = left_file.finishWriting(); - stats.right = right_file.finishWriting(); + left_file.finishWriting(); + right_file.finishWriting(); + state = State::JOINING_BLOCKS; } - return AccumulatedBlockReader(right_file, right_file_mutex); } @@ -231,22 +223,23 @@ private: std::atomic_bool is_empty = true; std::atomic state; - Stats stats; Poco::Logger * log; }; namespace { + template -void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets) +void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets, size_t except_index = 0) { chassert(blocks.size() == buckets.size()); retryForEach( generateRandomPermutation(1, buckets.size()), // skipping 0 block, since we join it in memory w/o spilling on disk [&](size_t i) { - if (!blocks[i].rows()) + /// Skip empty and current bucket + if (!blocks[i].rows() || i == except_index) return true; bool flushed = false; @@ -281,6 +274,7 @@ GraceHashJoin::GraceHashJoin( , right_key_names(table_join->getOnlyClause().key_names_right) , tmp_data(std::make_unique(tmp_data_, CurrentMetrics::TemporaryFilesForJoin)) , hash_join(makeInMemoryJoin()) + , hash_join_sample_block(hash_join->savedBlockSample()) { if (!GraceHashJoin::isSupported(table_join)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "GraceHashJoin is not supported for this join type"); @@ -288,6 +282,9 @@ GraceHashJoin::GraceHashJoin( void GraceHashJoin::initBuckets() { + if (!buckets.empty()) + return; + const auto & settings = context->getSettingsRef(); size_t initial_num_buckets = roundUpToPowerOfTwoOrZero(std::clamp(settings.grace_hash_join_initial_buckets, 1, settings.grace_hash_join_max_buckets)); @@ -300,7 +297,7 @@ void GraceHashJoin::initBuckets() if (buckets.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "No buckets created"); - LOG_TRACE(log, "Initialize {} buckets", buckets.size()); + LOG_TRACE(log, "Initialize {} bucket{}", buckets.size(), buckets.size() > 1 ? "s" : ""); current_bucket = buckets.front().get(); current_bucket->startJoining(); @@ -320,17 +317,44 @@ bool GraceHashJoin::addJoinedBlock(const Block & block, bool /*check_limits*/) throw Exception(ErrorCodes::LOGICAL_ERROR, "GraceHashJoin is not initialized"); Block materialized = materializeBlock(block); - addJoinedBlockImpl(materialized); + addJoinedBlockImpl(std::move(materialized)); return true; } -bool GraceHashJoin::fitsInMemory() const +bool GraceHashJoin::hasMemoryOverflow(size_t total_rows, size_t total_bytes) const { /// One row can't be split, avoid loop - if (hash_join->getTotalRowCount() < 2) - return true; + if (total_rows < 2) + return false; - return table_join->sizeLimits().softCheck(hash_join->getTotalRowCount(), hash_join->getTotalByteCount()); + bool has_overflow = !table_join->sizeLimits().softCheck(total_rows, total_bytes); + + if (has_overflow) + LOG_TRACE(log, "Memory overflow, size exceeded {} / {} bytes, {} / {} rows", + ReadableSize(total_bytes), ReadableSize(table_join->sizeLimits().max_bytes), + total_rows, table_join->sizeLimits().max_rows); + + return has_overflow; +} + +bool GraceHashJoin::hasMemoryOverflow(const BlocksList & blocks) const +{ + size_t total_rows = 0; + size_t total_bytes = 0; + for (const auto & block : blocks) + { + total_rows += block.rows(); + total_bytes += block.allocatedBytes(); + } + return hasMemoryOverflow(total_rows, total_bytes); +} + +bool GraceHashJoin::hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const +{ + size_t total_rows = hash_join_->getTotalRowCount(); + size_t total_bytes = hash_join_->getTotalByteCount(); + + return hasMemoryOverflow(total_rows, total_bytes); } GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) @@ -341,12 +365,13 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) if (to_size <= current_size) return buckets; - assert(isPowerOf2(to_size)); + chassert(isPowerOf2(to_size)); if (to_size > max_num_buckets) { throw Exception(ErrorCodes::LIMIT_EXCEEDED, - "Too many grace hash join buckets ({} > {}), consider increasing grace_hash_join_max_buckets or max_rows_in_join/max_bytes_in_join", + "Too many grace hash join buckets ({} > {}), " + "consider increasing grace_hash_join_max_buckets or max_rows_in_join/max_bytes_in_join", to_size, max_num_buckets); } @@ -361,14 +386,16 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) void GraceHashJoin::addBucket(Buckets & destination) { - BucketPtr new_bucket = std::make_shared( - destination.size(), tmp_data->createStream(left_sample_block), tmp_data->createStream(right_sample_block), log); + auto & left_file = tmp_data->createStream(left_sample_block); + auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + + BucketPtr new_bucket = std::make_shared(destination.size(), left_file, right_file, log); destination.emplace_back(std::move(new_bucket)); } void GraceHashJoin::checkTypesOfKeys(const Block & block) const { - assert(hash_join); + chassert(hash_join); return hash_join->checkTypesOfKeys(block); } @@ -421,7 +448,7 @@ size_t GraceHashJoin::getTotalRowCount() const size_t GraceHashJoin::getTotalByteCount() const { std::lock_guard lock(hash_join_mutex); - assert(hash_join); + chassert(hash_join); return hash_join->getTotalByteCount(); } @@ -435,9 +462,14 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const std::shared_lock lock(rehash_mutex); return std::all_of(buckets.begin(), buckets.end(), [](const auto & bucket) { return bucket->empty(); }); }(); - bool hash_join_is_empty = hash_join && hash_join->alwaysReturnsEmptySet(); - return hash_join_is_empty && file_buckets_are_empty; + if (!file_buckets_are_empty) + return false; + + chassert(hash_join); + bool hash_join_is_empty = hash_join->alwaysReturnsEmptySet(); + + return hash_join_is_empty; } IBlocksStreamPtr GraceHashJoin::getNonJoinedBlocks(const Block &, const Block &, UInt64) const @@ -526,17 +558,11 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() if (hash_join) { - auto right_blocks = hash_join->releaseJoinedBlocks(); - Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, right_blocks, buckets.size()); - - for (size_t i = 0; i < blocks.size(); ++i) + auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); + for (auto & block : right_blocks) { - if (blocks[i].rows() == 0 || i == bucket_idx) - continue; - - if (i < bucket_idx) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected bucket index {} when current bucket is {}", i, bucket_idx); - buckets[i]->addRightBlock(blocks[i]); + Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, block, buckets.size()); + flushBlocksToBuckets(blocks, buckets, bucket_idx); } } @@ -568,7 +594,7 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() return std::make_unique(current_bucket->idx, buckets, hash_join, left_key_names, right_key_names); } - LOG_TRACE(log, "Finished loading all buckets"); + LOG_TRACE(log, "Finished loading all {} buckets", buckets.size()); current_bucket = nullptr; return nullptr; @@ -579,42 +605,64 @@ GraceHashJoin::InMemoryJoinPtr GraceHashJoin::makeInMemoryJoin() return std::make_unique(table_join, right_sample_block, any_take_last_row); } +Block GraceHashJoin::prepareRightBlock(const Block & block) +{ + return HashJoin::prepareRightBlock(block, hash_join_sample_block); +} + void GraceHashJoin::addJoinedBlockImpl(Block block) { Buckets buckets_snapshot = getCurrentBuckets(); - Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, block, buckets_snapshot.size()); size_t bucket_index = current_bucket->idx; + Block current_block; + + { + Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, block, buckets_snapshot.size()); + flushBlocksToBuckets(blocks, buckets_snapshot, bucket_index); + current_block = std::move(blocks[bucket_index]); + } // Add block to the in-memory join - if (blocks[bucket_index].rows() > 0) + if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - hash_join->addJoinedBlock(blocks[bucket_index], /* check_limits = */ false); - bool overflow = !fitsInMemory(); - - if (overflow) - { - auto right_blocks = hash_join->releaseJoinedBlocks(); - right_blocks.pop_back(); - - for (const auto & right_block : right_blocks) - blocks.push_back(right_block); - } - - while (overflow) - { - buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); - - blocks = JoinCommon::scatterBlockByHash(right_key_names, blocks, buckets_snapshot.size()); + if (!hash_join) hash_join = makeInMemoryJoin(); - hash_join->addJoinedBlock(blocks[bucket_index], /* check_limits = */ false); - overflow = !fitsInMemory(); - } - blocks[bucket_index].clear(); - } - flushBlocksToBuckets(blocks, buckets_snapshot); + hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + + if (!hasMemoryOverflow(hash_join)) + return; + + current_block = {}; + + auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); + hash_join = nullptr; + + buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); + + { + Blocks current_blocks; + current_blocks.reserve(right_blocks.size()); + for (const auto & right_block : right_blocks) + { + Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, right_block, buckets_snapshot.size()); + flushBlocksToBuckets(blocks, buckets_snapshot, bucket_index); + current_blocks.emplace_back(std::move(blocks[bucket_index])); + } + + if (current_blocks.size() == 1) + current_block = std::move(current_blocks.front()); + else + current_block = concatenateBlocks(current_blocks); + } + + hash_join = makeInMemoryJoin(); + + if (current_block.rows() > 0) + hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + } } size_t GraceHashJoin::getNumBuckets() const diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index f4e75f142f3..4f7694e2f07 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -7,6 +7,7 @@ #include #include +#include #include @@ -95,8 +96,10 @@ private: /// Add right table block to the @join. Calls @rehash on overflow. void addJoinedBlockImpl(Block block); - /// Check that @join satisifes limits on rows/bytes in @table_join. - bool fitsInMemory() const; + /// Check that join satisfies limits on rows/bytes in table_join. + bool hasMemoryOverflow(size_t total_rows, size_t total_bytes) const; + bool hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const; + bool hasMemoryOverflow(const BlocksList & blocks) const; /// Create new bucket at the end of @destination. void addBucket(Buckets & destination); @@ -114,6 +117,9 @@ private: size_t getNumBuckets() const; Buckets getCurrentBuckets() const; + /// Structure block to store in the HashJoin according to sample_block. + Block prepareRightBlock(const Block & block); + Poco::Logger * log; ContextPtr context; std::shared_ptr table_join; @@ -130,12 +136,13 @@ private: TemporaryDataOnDiskPtr tmp_data; Buckets buckets; - mutable std::shared_mutex rehash_mutex; + mutable SharedMutex rehash_mutex; FileBucket * current_bucket = nullptr; mutable std::mutex current_bucket_mutex; InMemoryJoinPtr hash_join; + Block hash_join_sample_block; mutable std::mutex hash_join_mutex; }; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 5ff4f9beb05..fba985da41c 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -221,8 +221,8 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , right_sample_block(right_sample_block_) , log(&Poco::Logger::get("HashJoin")) { - LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}, right header: {}", data->type, kind, strictness, right_sample_block.dumpStructure()); - LOG_DEBUG(log, "Keys: {}", TableJoin::formatClauses(table_join->getClauses(), true)); + LOG_DEBUG(log, "({}) Datatype: {}, kind: {}, strictness: {}, right header: {}", fmt::ptr(this), data->type, kind, strictness, right_sample_block.dumpStructure()); + LOG_DEBUG(log, "({}) Keys: {}", fmt::ptr(this), TableJoin::formatClauses(table_join->getClauses(), true)); if (isCrossOrComma(kind)) { @@ -267,10 +267,10 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s /// @note ASOF JOIN is not INNER. It's better avoid use of 'INNER ASOF' combination in messages. /// In fact INNER means 'LEFT SEMI ASOF' while LEFT means 'LEFT OUTER ASOF'. if (!isLeft(kind) && !isInner(kind)) - throw Exception("Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported"); if (key_columns.size() <= 1) - throw Exception("ASOF join needs at least one equi-join column", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "ASOF join needs at least one equi-join column"); size_t asof_size; asof_type = SortedLookupVectorBase::getTypeSize(*key_columns.back(), asof_size); @@ -340,7 +340,7 @@ HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_c return Type::keys128; if (size_of_field == 32) return Type::keys256; - throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys @@ -469,6 +469,9 @@ bool HashJoin::alwaysReturnsEmptySet() const size_t HashJoin::getTotalRowCount() const { + if (!data) + return 0; + size_t res = 0; if (data->type == Type::CROSS) @@ -484,28 +487,45 @@ size_t HashJoin::getTotalRowCount() const } } - return res; } size_t HashJoin::getTotalByteCount() const { + if (!data) + return 0; + +#ifdef NDEBUG + size_t debug_blocks_allocated_size = 0; + for (const auto & block : data->blocks) + debug_blocks_allocated_size += block.allocatedBytes(); + + if (data->blocks_allocated_size != debug_blocks_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_allocated_size != debug_blocks_allocated_size ({} != {})", + data->blocks_allocated_size, debug_blocks_allocated_size); + + size_t debug_blocks_nullmaps_allocated_size = 0; + for (const auto & nullmap : data->blocks_nullmaps) + debug_blocks_nullmaps_allocated_size += nullmap.second->allocatedBytes(); + + if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", + data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); +#endif + size_t res = 0; - if (data->type == Type::CROSS) - { - for (const auto & block : data->blocks) - res += block.bytes(); - } - else + res += data->blocks_allocated_size; + res += data->blocks_nullmaps_allocated_size; + res += data->pool.size(); + + if (data->type != Type::CROSS) { for (const auto & map : data->maps) { joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); } - res += data->pool.size(); } - return res; } @@ -656,42 +676,57 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample) } } -Block HashJoin::structureRightBlock(const Block & block) const +Block HashJoin::prepareRightBlock(const Block & block, const Block & saved_block_sample_) { Block structured_block; - for (const auto & sample_column : savedBlockSample().getColumnsWithTypeAndName()) + for (const auto & sample_column : saved_block_sample_.getColumnsWithTypeAndName()) { ColumnWithTypeAndName column = block.getByName(sample_column.name); if (sample_column.column->isNullable()) JoinCommon::convertColumnToNullable(column); - structured_block.insert(column); + + if (column.column->lowCardinality() && !sample_column.column->lowCardinality()) + { + column.column = column.column->convertToFullColumnIfLowCardinality(); + column.type = removeLowCardinality(column.type); + } + + /// There's no optimization for right side const columns. Remove constness if any. + column.column = recursiveRemoveSparse(column.column->convertToFullColumnIfConst()); + structured_block.insert(std::move(column)); } return structured_block; } +Block HashJoin::prepareRightBlock(const Block & block) const +{ + return prepareRightBlock(block, savedBlockSample()); +} + bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) { + if (!data) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Join data was released"); + /// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency. /// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code. if (unlikely(source_block.rows() > std::numeric_limits::max())) - throw Exception("Too many rows in right table block for HashJoin: " + toString(source_block.rows()), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block.rows()); - /// There's no optimization for right side const columns. Remove constness if any. - Block block = materializeBlock(source_block); - size_t rows = block.rows(); + size_t rows = source_block.rows(); - ColumnRawPtrMap all_key_columns = JoinCommon::materializeColumnsInplaceMap(block, table_join->getAllNames(JoinTableSide::Right)); + ColumnPtrMap all_key_columns = JoinCommon::materializeColumnsInplaceMap(source_block, table_join->getAllNames(JoinTableSide::Right)); - Block structured_block = structureRightBlock(block); + Block block_to_save = prepareRightBlock(source_block); size_t total_rows = 0; size_t total_bytes = 0; { if (storage_join_lock) - throw DB::Exception("addJoinedBlock called when HashJoin locked to prevent updates", - ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addJoinedBlock called when HashJoin locked to prevent updates"); - data->blocks.emplace_back(std::move(structured_block)); + data->blocks_allocated_size += block_to_save.allocatedBytes(); + data->blocks.emplace_back(std::move(block_to_save)); Block * stored_block = &data->blocks.back(); if (rows) @@ -703,7 +738,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) { ColumnRawPtrs key_columns; for (const auto & name : onexprs[onexpr_idx].key_names_right) - key_columns.push_back(all_key_columns[name]); + key_columns.push_back(all_key_columns[name].get()); /// We will insert to the map only keys, where all components are not NULL. ConstNullMapPtr null_map{}; @@ -718,14 +753,14 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) save_nullmap |= (*null_map)[i]; } - auto join_mask_col = JoinCommon::getColumnAsMask(block, onexprs[onexpr_idx].condColumnNames().second); + auto join_mask_col = JoinCommon::getColumnAsMask(source_block, onexprs[onexpr_idx].condColumnNames().second); /// Save blocks that do not hold conditions in ON section ColumnUInt8::MutablePtr not_joined_map = nullptr; - if (!multiple_disjuncts && isRightOrFull(kind) && !join_mask_col.isConstant()) + if (!multiple_disjuncts && isRightOrFull(kind) && join_mask_col.hasData()) { const auto & join_mask = join_mask_col.getData(); /// Save rows that do not hold conditions - not_joined_map = ColumnUInt8::create(block.rows(), 0); + not_joined_map = ColumnUInt8::create(rows, 0); for (size_t i = 0, sz = join_mask->size(); i < sz; ++i) { /// Condition hold, do not save row @@ -759,10 +794,16 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) } if (!multiple_disjuncts && save_nullmap) + { + data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); + } if (!multiple_disjuncts && not_joined_map) + { + data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); + } if (!check_limits) return true; @@ -795,7 +836,6 @@ struct JoinOnKeyColumns Sizes key_sizes; - explicit JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_) : key_names(key_names_) , materialized_keys_holder(JoinCommon::materializeColumns(block, key_names)) /// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them. @@ -1610,10 +1650,9 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, { size_t num_keys = data_types.size(); if (right_table_keys.columns() != num_keys) - throw Exception( - "Number of arguments for function joinGet" + toString(or_null ? "OrNull" : "") - + " doesn't match: passed, should be equal to " + toString(num_keys), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function joinGet{} doesn't match: passed, should be equal to {}", + toString(or_null ? "OrNull" : ""), toString(num_keys)); for (size_t i = 0; i < num_keys; ++i) { @@ -1622,14 +1661,12 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin)); auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin)); if (!left_type->equals(*right_type)) - throw Exception( - "Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is " - + right_type->getName(), - ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in joinGet key {}: " + "found type {}, while the needed type is {}", i, left_type->getName(), right_type->getName()); } if (!sample_block_with_columns_to_add.has(column_name)) - throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "StorageJoin doesn't contain column {}", column_name); auto elem = sample_block_with_columns_to_add.getByName(column_name); if (or_null && JoinCommon::canBecomeNullable(elem.type)) @@ -1644,7 +1681,7 @@ ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block bool is_valid = (strictness == JoinStrictness::Any || strictness == JoinStrictness::RightAny) && kind == JoinKind::Left; if (!is_valid) - throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN); + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "joinGet only supports StorageJoin of type Left Any"); const auto & key_names_right = table_join->getOnlyClause().key_names_right; /// Assemble the key block with correct names. @@ -1676,7 +1713,7 @@ void HashJoin::checkTypesOfKeys(const Block & block) const void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) { - if (data->released) + if (!data) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); for (const auto & onexpr : table_join->getClauses()) @@ -1715,6 +1752,16 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) } } +HashJoin::~HashJoin() +{ + if (!data) + { + LOG_TRACE(log, "({}) Join data has been already released", fmt::ptr(this)); + return; + } + LOG_TRACE(log, "({}) Join data is being destroyed, {} bytes and {} rows in hash table", fmt::ptr(this), getTotalByteCount(), getTotalRowCount()); +} + template struct AdderNonJoined { @@ -1753,7 +1800,6 @@ struct AdderNonJoined } }; - /// Stream from not joined earlier rows of the right table. /// Based on: /// - map offsetInternal saved in used_flags for single disjuncts @@ -1764,7 +1810,10 @@ class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller public: NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_) : parent(parent_), max_block_size(max_block_size_), current_block_start(0) - {} + { + if (parent.data == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); + } Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } @@ -1961,7 +2010,6 @@ IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, size_t left_columns_count = left_sample_block.columns(); auto non_joined = std::make_unique>(*this, max_block_size); return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); - } else { @@ -1979,7 +2027,7 @@ void HashJoin::reuseJoinedData(const HashJoin & join) bool multiple_disjuncts = !table_join->oneDisjunct(); if (multiple_disjuncts) - throw Exception("StorageJoin with ORs is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); for (auto & map : data->maps) { @@ -1990,10 +2038,20 @@ void HashJoin::reuseJoinedData(const HashJoin & join) } } -BlocksList HashJoin::releaseJoinedBlocks() +BlocksList HashJoin::releaseJoinedBlocks(bool restructure) { + LOG_TRACE(log, "({}) Join data is being released, {} bytes and {} rows in hash table", fmt::ptr(this), getTotalByteCount(), getTotalRowCount()); + BlocksList right_blocks = std::move(data->blocks); - data->released = true; + if (!restructure) + { + data.reset(); + return right_blocks; + } + + data->maps.clear(); + data->blocks_nullmaps.clear(); + BlocksList restored_blocks; /// names to positions optimization @@ -2022,6 +2080,7 @@ BlocksList HashJoin::releaseJoinedBlocks() restored_blocks.emplace_back(std::move(restored_block)); } + data.reset(); return restored_blocks; } diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 5ea47823b69..b29b6e617c8 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -149,6 +149,8 @@ class HashJoin : public IJoin public: HashJoin(std::shared_ptr table_join_, const Block & right_sample_block, bool any_take_last_row_ = false); + ~HashJoin() override; + const TableJoin & getTableJoin() const override { return *table_join; } /** Add block of data from right hand of JOIN to the map. @@ -336,7 +338,8 @@ public: /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows. Arena pool; - bool released = false; + size_t blocks_allocated_size = 0; + size_t blocks_nullmaps_allocated_size = 0; }; using RightTableDataPtr = std::shared_ptr; @@ -351,7 +354,13 @@ public: void reuseJoinedData(const HashJoin & join); RightTableDataPtr getJoinedData() const { return data; } - BlocksList releaseJoinedBlocks(); + BlocksList releaseJoinedBlocks(bool restructure = false); + + /// Modify right block (update structure according to sample block) to save it in block list + static Block prepareRightBlock(const Block & block, const Block & saved_block_sample_); + Block prepareRightBlock(const Block & block) const; + + const Block & savedBlockSample() const { return data->sample_block; } bool isUsed(size_t off) const { return used_flags.getUsedSafe(off); } bool isUsed(const Block * block_ptr, size_t row_idx) const { return used_flags.getUsedSafe(block_ptr, row_idx); } @@ -403,10 +412,6 @@ private: void dataMapInit(MapsVariant &); - const Block & savedBlockSample() const { return data->sample_block; } - - /// Modify (structure) right block to save it in block list - Block structureRightBlock(const Block & stored_block) const; void initRightBlockStructure(Block & saved_block_sample); template diff --git a/src/Interpreters/ITokenExtractor.h b/src/Interpreters/ITokenExtractor.h index 77de4233b63..fdcc9880bff 100644 --- a/src/Interpreters/ITokenExtractor.h +++ b/src/Interpreters/ITokenExtractor.h @@ -77,12 +77,15 @@ class ITokenExtractorHelper : public ITokenExtractor { size_t cur = 0; String token; + while (cur < length && static_cast(this)->nextInStringLike(data, length, &cur, token)) bloom_filter.add(token.c_str(), token.size()); } + void stringToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const override { gin_filter.setQueryString(data, length); + size_t cur = 0; size_t token_start = 0; size_t token_len = 0; @@ -94,6 +97,7 @@ class ITokenExtractorHelper : public ITokenExtractor void stringPaddedToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const override { gin_filter.setQueryString(data, length); + size_t cur = 0; size_t token_start = 0; size_t token_len = 0; @@ -105,8 +109,10 @@ class ITokenExtractorHelper : public ITokenExtractor void stringLikeToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const override { gin_filter.setQueryString(data, length); + size_t cur = 0; String token; + while (cur < length && static_cast(this)->nextInStringLike(data, length, &cur, token)) gin_filter.addTerm(token.c_str(), token.size()); } diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 0aa70057794..36972aeb03d 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -56,7 +56,7 @@ std::optional tryChooseTable(const ASTIdentifier & identifier, const std if ((best_match != ColumnMatch::NoMatch) && same_match) { if (!allow_ambiguous) - throw Exception("Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Ambiguous column '{}'", identifier.name()); best_match = ColumnMatch::Ambiguous; return {}; } diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 58faeb41a15..ca804fe84a3 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -81,8 +81,8 @@ private: String alias = database_and_table->tryGetAlias(); if (alias.empty()) - throw Exception("Distributed table should have an alias when distributed_product_mode set to local", - ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED); + throw Exception(ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED, + "Distributed table should have an alias when distributed_product_mode set to local"); auto & identifier = database_and_table->as(); renamed_tables.emplace_back(identifier.clone()); @@ -103,22 +103,22 @@ private: /// Already processed. } else - throw Exception("Logical error: unexpected function name " + concrete->name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected function name {}", concrete->name); } else if (table_join) table_join->locality = JoinLocality::Global; else - throw Exception("Logical error: unexpected AST node", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected AST node"); } else if (distributed_product_mode == DistributedProductMode::DENY) { - throw Exception("Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny')." - " You may rewrite query to use local tables in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value.", - ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED); + throw Exception(ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED, + "Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny'). " + "You may rewrite query to use local tables " + "in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value."); } else - throw Exception("InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting"); } }; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 300574912d9..a87308f0cef 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -121,8 +121,8 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) else if (auto mut_command = MutationCommand::parse(command_ast)) { if (mut_command->type == MutationCommand::MATERIALIZE_TTL && !metadata_snapshot->hasAnyTTL()) - throw Exception("Cannot MATERIALIZE TTL as there is no TTL set for table " - + table->getStorageID().getNameForLogs(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot MATERIALIZE TTL as there is no TTL set for table {}", + table->getStorageID().getNameForLogs()); mutation_commands.emplace_back(std::move(*mut_command)); } @@ -131,7 +131,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) live_view_commands.emplace_back(std::move(*live_view_command)); } else - throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); } if (typeid_cast(database.get())) @@ -147,7 +147,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) { table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); - table->mutate(mutation_commands, getContext(), false); + table->mutate(mutation_commands, getContext()); } if (!partition_commands.empty()) @@ -200,7 +200,7 @@ BlockIO InterpreterAlterQuery::executeToDatabase(const ASTAlterQuery & alter) if (auto alter_command = AlterCommand::parse(command_ast)) alter_commands.emplace_back(std::move(*alter_command)); else - throw Exception("Wrong parameter type in ALTER DATABASE query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER DATABASE query"); } if (!alter_commands.empty()) @@ -451,7 +451,7 @@ void InterpreterAlterQuery::extendQueryLogElemImpl(QueryLogElement & elem, const { // Alter queries already have their target table inserted into `elem`. if (elem.query_tables.size() != 1) - throw Exception("Alter query should have target table recorded already", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Alter query should have target table recorded already"); String prefix = *elem.query_tables.begin() + "."; for (const auto & child : alter.command_list->children) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index bea88885d20..611f533d559 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -124,7 +124,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (create.if_not_exists) return {}; else - throw Exception("Database " + database_name + " already exists.", ErrorCodes::DATABASE_ALREADY_EXISTS); + throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", database_name); } /// Will write file with database metadata, if needed. @@ -136,7 +136,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (!create.storage && create.attach) { if (!fs::exists(metadata_file_path)) - throw Exception("Database engine must be specified for ATTACH DATABASE query", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Database engine must be specified for ATTACH DATABASE query"); /// Short syntax: try read database definition from file auto ast = DatabaseOnDisk::parseQueryFromMetadata(nullptr, getContext(), metadata_file_path); create = ast->as(); @@ -151,7 +151,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) /// For new-style databases engine is explicitly specified in .sql /// When attaching old-style database during server startup, we must always use Ordinary engine if (create.attach) - throw Exception("Database engine must be specified for ATTACH DATABASE query", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Database engine must be specified for ATTACH DATABASE query"); auto engine = std::make_shared(); auto storage = std::make_shared(); engine->name = "Atomic"; @@ -204,8 +204,9 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { /// Ambiguity is possible: should we attach nested database as Ordinary /// or throw "UUID must be specified" for Atomic? So we suggest short syntax for Ordinary. - throw Exception("Use short attach syntax ('ATTACH DATABASE name;' without engine) to attach existing database " - "or specify UUID to attach new database with Atomic engine", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Use short attach syntax ('ATTACH DATABASE name;' without engine) " + "to attach existing database or specify UUID to attach new database with Atomic engine"); } /// Set metadata path according to nested engine @@ -218,7 +219,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; if (create.uuid != UUIDHelpers::Nil && !is_on_cluster) - throw Exception("Ordinary database engine does not support UUID", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Ordinary database engine does not support UUID"); /// Ignore UUID if it's ON CLUSTER query create.uuid = UUIDHelpers::Nil; @@ -229,24 +230,27 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) && !getContext()->getSettingsRef().allow_experimental_database_materialized_mysql && !internal && !create.attach) { - throw Exception("MaterializedMySQL is an experimental database engine. " - "Enable allow_experimental_database_materialized_mysql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, + "MaterializedMySQL is an experimental database engine. " + "Enable allow_experimental_database_materialized_mysql to use it."); } if (create.storage->engine->name == "Replicated" && !getContext()->getSettingsRef().allow_experimental_database_replicated && !internal && !create.attach) { - throw Exception("Replicated is an experimental database engine. " - "Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, + "Replicated is an experimental database engine. " + "Enable allow_experimental_database_replicated to use it."); } if (create.storage->engine->name == "MaterializedPostgreSQL" && !getContext()->getSettingsRef().allow_experimental_database_materialized_postgresql && !internal && !create.attach) { - throw Exception("MaterializedPostgreSQL is an experimental database engine. " - "Enable allow_experimental_database_materialized_postgresql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, + "MaterializedPostgreSQL is an experimental database engine. " + "Enable allow_experimental_database_materialized_postgresql to use it."); } bool need_write_metadata = !create.attach || !fs::exists(metadata_file_path); @@ -478,7 +482,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.collation && !context_->getSettingsRef().compatibility_ignore_collation_in_create_table) { - throw Exception("Cannot support collation, please set compatibility_ignore_collation_in_create_table=true", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot support collation, please set compatibility_ignore_collation_in_create_table=true"); } DataTypePtr column_type = nullptr; @@ -492,7 +496,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.null_modifier) { if (column_type->isNullable()) - throw Exception("Can't use [NOT] NULL modifier with Nullable type", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE); + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Can't use [NOT] NULL modifier with Nullable type"); if (*col_decl.null_modifier) column_type = makeNullable(column_type); } @@ -571,10 +575,9 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "AUTO_INCREMENT" && !context_->getSettingsRef().compatibility_ignore_auto_increment_in_create_table) { - throw Exception( - "AUTO_INCREMENT is not supported. To ignore the keyword in column declaration, set " - "`compatibility_ignore_auto_increment_in_create_table` to true", - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, + "AUTO_INCREMENT is not supported. To ignore the keyword " + "in column declaration, set `compatibility_ignore_auto_increment_in_create_table` to true"); } if (col_decl.default_expression) @@ -607,7 +610,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( else if (col_decl.type) column.type = name_type_it->type; else - throw Exception{"Neither default value expression nor type is provided for a column", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Neither default value expression nor type is provided for a column"); if (col_decl.comment) column.comment = col_decl.comment->as().value.get(); @@ -615,7 +618,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.codec) { if (col_decl.default_specifier == "ALIAS") - throw Exception{"Cannot specify codec for column type ALIAS", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs); } @@ -630,7 +633,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( res.flattenNested(); if (res.getAllPhysical().empty()) - throw Exception{"Cannot CREATE table without physical columns", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Cannot CREATE table without physical columns"); return res; } @@ -666,7 +669,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (create.columns_list) { if (create.as_table_function && (create.columns_list->indices || create.columns_list->constraints)) - throw Exception("Indexes and constraints are not supported for table functions", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Indexes and constraints are not supported for table functions"); /// Dictionaries have dictionary_attributes_list instead of columns_list assert(!create.is_dictionary); @@ -680,14 +683,14 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti for (const auto & index : create.columns_list->indices->children) { IndexDescription index_desc = IndexDescription::getIndexFromAST(index->clone(), properties.columns, getContext()); - if (index_desc.type == GinFilter::FilterName && getContext()->getSettingsRef().allow_experimental_inverted_index == false) + const auto & settings = getContext()->getSettingsRef(); + if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index) { - throw Exception( - "Experimental Inverted Index feature is not enabled (the setting 'allow_experimental_inverted_index')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental Inverted Index feature is not enabled (the setting 'allow_experimental_inverted_index')"); } - if (index_desc.type == "annoy" && !getContext()->getSettingsRef().allow_experimental_annoy_index) - throw Exception("Annoy index is disabled. Turn on allow_experimental_annoy_index", ErrorCodes::INCORRECT_QUERY); + if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index"); properties.indices.push_back(index_desc); } @@ -754,7 +757,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti /// We can have queries like "CREATE TABLE
ENGINE=" if /// supports schema inference (will determine table structure in it's constructor). else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) - throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect CREATE query: required list of column descriptions or AS section or SELECT."); /// Even if query has list of columns, canonicalize it (unfold Nested columns). if (!create.columns_list) @@ -787,7 +790,7 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat for (const auto & column : properties.columns) { if (!all_columns.emplace(column.name).second) - throw Exception("Column " + backQuoteIfNeed(column.name) + " already exists", ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} already exists", backQuoteIfNeed(column.name)); } /// Check if _row_exists for lightweight delete column in column_lists for merge tree family. @@ -795,9 +798,10 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat { auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name); if (search != all_columns.end()) - throw Exception("Cannot create table with column '" + LightweightDeleteDescription::FILTER_COLUMN.name + "' " - "for *MergeTree engines because it is reserved for lightweight delete feature", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column '{}' for *MergeTree engines because it " + "is reserved for lightweight delete feature", + LightweightDeleteDescription::FILTER_COLUMN.name); } const auto & settings = getContext()->getSettingsRef(); @@ -810,10 +814,11 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat if (const auto * current_type_ptr = typeid_cast(name_and_type_pair.type.get())) { if (!isStringOrFixedString(*removeNullable(current_type_ptr->getDictionaryType()))) - throw Exception("Creating columns of type " + current_type_ptr->getName() + " is prohibited by default " + throw Exception(ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, + "Creating columns of type {} is prohibited by default " "due to expected negative impact on performance. " "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", - ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY); + current_type_ptr->getName()); } } } @@ -825,10 +830,10 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat const auto & type = name_and_type_pair.type->getName(); if (type == "MultiPolygon" || type == "Polygon" || type == "Ring" || type == "Point") { - String message = "Cannot create table with column '" + name_and_type_pair.name + "' which type is '" - + type + "' because experimental geo types are not allowed. " - + "Set setting allow_experimental_geo_types = 1 in order to allow it"; - throw Exception(message, ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column '{}' which type is '{}' " + "because experimental geo types are not allowed. " + "Set setting allow_experimental_geo_types = 1 in order to allow it", + name_and_type_pair.name, type); } } } @@ -891,7 +896,7 @@ String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_tab return "Memory"; default: - throw Exception("default_table_engine is set to unknown value", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "default_table_engine is set to unknown value"); } } @@ -1008,7 +1013,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data && !internal) { if (create.uuid == UUIDHelpers::Nil) - throw Exception("Table UUID is not specified in DDL log", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table UUID is not specified in DDL log"); } bool from_path = create.attach_from_path.has_value(); @@ -1052,8 +1057,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { /// Temporary tables are created out of databases. if (create.temporary && create.database) - throw Exception("Temporary tables cannot be inside a database. You should not specify a database for a temporary table.", - ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE); + throw Exception(ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE, + "Temporary tables cannot be inside a database. " + "You should not specify a database for a temporary table."); String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database ? create.getDatabase() : current_database; @@ -1382,7 +1388,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, bool is_replicated_storage = typeid_cast(res.get()) != nullptr; if (!is_replicated_storage && res->storesDataOnDisk() && database && database->getEngineName() == "Replicated") throw Exception(ErrorCodes::UNKNOWN_STORAGE, - "Only tables with a Replicated engine or tables which do not store data on disk are allowed in a Replicated database"); + "Only tables with a Replicated engine " + "or tables which do not store data on disk are allowed in a Replicated database"); } if (from_path && !res->storesDataOnDisk()) @@ -1603,10 +1610,11 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont return; } - throw Exception("Seems like cluster is configured for cross-replication, " + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Seems like cluster is configured for cross-replication, " "but zookeeper_path for ReplicatedMergeTree is not specified or contains {uuid} macro. " "It's not supported for cross replication, because tables must have different UUIDs. " - "Please specify unique zookeeper_path explicitly.", ErrorCodes::INCORRECT_QUERY); + "Please specify unique zookeeper_path explicitly."); } } diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 736fc90a346..f8974a19f45 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -5,15 +5,16 @@ #include #include #include +#include #include +#include +#include +#include #include -#include -#include #include #include #include #include -#include namespace DB @@ -72,43 +73,35 @@ BlockIO InterpreterDeleteQuery::execute() table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); - table->mutate(mutation_commands, getContext(), false); + table->mutate(mutation_commands, getContext()); return {}; } else if (table->supportsLightweightDelete()) { if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Lightweight delete mutate is experimental. " + "Set `allow_experimental_lightweight_delete` setting to enable it"); - /// Convert to MutationCommand - MutationCommands mutation_commands; - MutationCommand mut_command; + /// Build "ALTER ... UPDATE _row_exists = 0 WHERE predicate" query + String alter_query = + "ALTER TABLE " + table->getStorageID().getFullTableName() + + (delete_query.cluster.empty() ? "" : " ON CLUSTER " + backQuoteIfNeed(delete_query.cluster)) + + " UPDATE `_row_exists` = 0 WHERE " + serializeAST(*delete_query.predicate); - /// Build "UPDATE _row_exists = 0 WHERE predicate" query - mut_command.type = MutationCommand::Type::UPDATE; - mut_command.predicate = delete_query.predicate; + ParserAlterQuery parser; + ASTPtr alter_ast = parseQuery( + parser, + alter_query.data(), + alter_query.data() + alter_query.size(), + "ALTER query", + 0, + DBMS_DEFAULT_MAX_PARSER_DEPTH); - auto command = std::make_shared(); - command->type = ASTAlterCommand::UPDATE; - command->predicate = delete_query.predicate; - command->update_assignments = std::make_shared(); - auto set_row_does_not_exist = std::make_shared(); - set_row_does_not_exist->column_name = LightweightDeleteDescription::FILTER_COLUMN.name; - auto zero_value = std::make_shared(DB::Field(UInt8(0))); - set_row_does_not_exist->children.push_back(zero_value); - command->update_assignments->children.push_back(set_row_does_not_exist); - command->children.push_back(command->predicate); - command->children.push_back(command->update_assignments); - mut_command.column_to_update_expression[set_row_does_not_exist->column_name] = zero_value; - mut_command.ast = command->ptr(); - - mutation_commands.emplace_back(mut_command); - - table->checkMutationIsPossible(mutation_commands, getContext()->getSettingsRef()); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, getContext(), false).validate(); - table->mutate(mutation_commands, getContext(), true); - - return {}; + auto context = Context::createCopy(getContext()); + context->setSetting("mutations_sync", 2); /// Lightweight delete is always synchronous + InterpreterAlterQuery alter_interpreter(alter_ast, context); + return alter_interpreter.execute(); } else { diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 8cc9b38e44f..f2f937f6ec0 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -70,7 +70,7 @@ BlockIO InterpreterDropQuery::execute() else if (drop.database) return executeToDatabase(drop); else - throw Exception("Nothing to drop, both names are empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Nothing to drop, both names are empty"); } @@ -201,7 +201,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue else if (query.kind == ASTDropQuery::Kind::Truncate) { if (table->isDictionary()) - throw Exception("Cannot TRUNCATE dictionary", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot TRUNCATE dictionary"); context_->checkAccess(AccessType::TRUNCATE, table_id); if (table->isStaticStorage()) @@ -262,7 +262,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, ASTDropQuery::Kind kind) { if (kind == ASTDropQuery::Kind::Detach) - throw Exception("Unable to detach temporary table.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unable to detach temporary table."); else { auto context_handle = getContext()->hasSessionContext() ? getContext()->getSessionContext() : getContext(); @@ -287,6 +287,10 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, table->drop(); table->is_dropped = true; } + else if (kind == ASTDropQuery::Kind::Detach) + { + table->is_detached = true; + } } } @@ -331,7 +335,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, { if (query.kind == ASTDropQuery::Kind::Truncate) { - throw Exception("Unable to truncate database", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unable to truncate database"); } else if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop) { @@ -339,7 +343,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, getContext()->checkAccess(AccessType::DROP_DATABASE, database_name); if (query.kind == ASTDropQuery::Kind::Detach && query.permanently) - throw Exception("DETACH PERMANENTLY is not implemented for databases", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases"); if (database->hasReplicationThread()) database->stopReplication(); diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 758c6d81407..90fa15bf63f 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -69,7 +69,7 @@ QueryPipeline InterpreterExistsQuery::executeImpl() else if ((exists_query = query_ptr->as())) { if (exists_query->temporary) - throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Temporary dictionaries are not possible."); String database = getContext()->resolveDatabase(exists_query->getDatabase()); getContext()->checkAccess(AccessType::SHOW_DICTIONARIES, database, exists_query->getTable()); result = DatabaseCatalog::instance().isDictionaryExist({database, exists_query->getTable()}); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 17a6b695088..3c225522cc4 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -252,7 +252,7 @@ struct ExplainSettings : public Settings { auto it = boolean_settings.find(name_); if (it == boolean_settings.end()) - throw Exception("Unknown setting for ExplainSettings: " + name_, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown setting for ExplainSettings: {}", name_); it->second.get() = value; } @@ -261,7 +261,7 @@ struct ExplainSettings : public Settings { auto it = integer_settings.find(name_); if (it == integer_settings.end()) - throw Exception("Unknown setting for ExplainSettings: " + name_, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown setting for ExplainSettings: {}", name_); it->second.get() = value; } @@ -314,8 +314,8 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) for (const auto & change : set_query.changes) { if (!settings.has(change.name)) - throw Exception("Unknown setting \"" + change.name + "\" for EXPLAIN " + Settings::name + " query. " - "Supported settings: " + settings.getSettingsList(), ErrorCodes::UNKNOWN_SETTING); + throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting \"{}\" for EXPLAIN {} query. " + "Supported settings: {}", change.name, Settings::name, settings.getSettingsList()); if (change.value.getType() != Field::Types::UInt64) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, @@ -326,8 +326,8 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) { auto value = change.value.get(); if (value > 1) - throw Exception("Invalid value " + std::to_string(value) + " for setting \"" + change.name + - "\". Expected boolean type", ErrorCodes::INVALID_SETTING_VALUE); + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Invalid value {} for setting \"{}\". " + "Expected boolean type", value, change.name); settings.setBooleanSetting(change.name, value); } @@ -427,7 +427,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() case ASTExplainQuery::QueryPlan: { if (!dynamic_cast(ast.getExplainedQuery().get())) - throw Exception("Only SELECT is supported for EXPLAIN query", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN query"); auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; @@ -521,13 +521,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() printPipeline(io.pipeline.getProcessors(), buf); } else - throw Exception("Only SELECT and INSERT is supported for EXPLAIN PIPELINE query", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT and INSERT is supported for EXPLAIN PIPELINE query"); break; } case ASTExplainQuery::QueryEstimates: { if (!dynamic_cast(ast.getExplainedQuery().get())) - throw Exception("Only SELECT is supported for EXPLAIN ESTIMATE query", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN ESTIMATE query"); auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; @@ -564,7 +564,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() case ASTExplainQuery::CurrentTransaction: { if (ast.getSettings()) - throw Exception("Settings are not supported for EXPLAIN CURRENT TRANSACTION query.", ErrorCodes::UNKNOWN_SETTING); + throw Exception(ErrorCodes::UNKNOWN_SETTING, "Settings are not supported for EXPLAIN CURRENT TRANSACTION query."); if (auto txn = getContext()->getCurrentTransaction()) { diff --git a/src/Interpreters/InterpreterExternalDDLQuery.cpp b/src/Interpreters/InterpreterExternalDDLQuery.cpp index 5c06ab4b818..c0acb1e03eb 100644 --- a/src/Interpreters/InterpreterExternalDDLQuery.cpp +++ b/src/Interpreters/InterpreterExternalDDLQuery.cpp @@ -35,7 +35,7 @@ BlockIO InterpreterExternalDDLQuery::execute() const ASTExternalDDLQuery & external_ddl_query = query->as(); if (getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY) - throw Exception("Cannot parse and execute EXTERNAL DDL FROM.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse and execute EXTERNAL DDL FROM."); if (external_ddl_query.from->name == "MySQL") { @@ -43,7 +43,7 @@ BlockIO InterpreterExternalDDLQuery::execute() const ASTs & arguments = external_ddl_query.from->arguments->children; if (arguments.size() != 2 || !arguments[0]->as() || !arguments[1]->as()) - throw Exception("MySQL External require two identifier arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "MySQL External require two identifier arguments."); if (external_ddl_query.external_ddl->as()) return MySQLInterpreter::InterpreterMySQLDropQuery( diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 4c677ce5e18..502de459156 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -73,6 +74,7 @@ #include #include #include +#include #include #include #include @@ -167,6 +169,10 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut { return std::make_unique(query, context); } + else if (query->as()) + { + return std::make_unique(query, context); + } else if (query->as()) { return std::make_unique(query, context); @@ -346,7 +352,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut } else { - throw Exception("Unknown type of query: " + query->getID(), ErrorCodes::UNKNOWN_TYPE_OF_QUERY); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Unknown type of query: {}", query->getID()); } } } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 62f3e190ef6..74fe351daaf 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -153,7 +152,7 @@ Block InterpreterInsertQuery::getSampleBlock( for (const auto & current_name : names) { if (res.has(current_name)) - throw Exception("Column " + current_name + " specified more than once", ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} specified more than once", current_name); /// Column is not ordinary or ephemeral if (!table_sample_insertable.has(current_name)) @@ -162,13 +161,13 @@ Block InterpreterInsertQuery::getSampleBlock( if (table_sample_physical.has(current_name)) { if (!allow_materialized) - throw Exception("Cannot insert column " + current_name + ", because it is MATERIALIZED column.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", + current_name); res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name)); } else /// The table does not have a column with that name - throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column {} in table {}", + current_name, table->getStorageID().getNameForLogs()); } else res.insert(ColumnWithTypeAndName(table_sample_insertable.getByName(current_name).type, current_name)); @@ -528,7 +527,7 @@ BlockIO InterpreterInsertQuery::execute() { for (const auto & column : metadata_snapshot->getColumns()) if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); } res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 1b4364351df..40698386ccb 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -120,7 +120,7 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce } if (res.empty() && access_denied) - throw Exception("User " + my_client.current_user + " attempts to kill query created by " + query_user, ErrorCodes::ACCESS_DENIED); + throw Exception(ErrorCodes::ACCESS_DENIED, "User {} attempts to kill query created by {}", my_client.current_user, query_user); return res; } @@ -291,9 +291,8 @@ BlockIO InterpreterKillQueryQuery::execute() } if (res_columns[0]->empty() && access_denied) - throw Exception( - "Not allowed to kill mutation. To execute this query it's necessary to have the grant " + required_access_rights.toString(), - ErrorCodes::ACCESS_DENIED); + throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill mutation. " + "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); @@ -356,9 +355,8 @@ BlockIO InterpreterKillQueryQuery::execute() } if (res_columns[0]->empty() && access_denied) - throw Exception( - "Not allowed to kill move partition. To execute this query it's necessary to have the grant " + required_access_rights.toString(), - ErrorCodes::ACCESS_DENIED); + throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill move partition. " + "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); @@ -428,7 +426,7 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S while (executor.pull(tmp_block)); if (tmp_block) - throw Exception("Expected one block from input stream", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected one block from input stream"); return res; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 2578a821a4f..624859300b9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -382,8 +382,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Settings & settings = context->getSettingsRef(); if (settings.max_subquery_depth && options.subquery_depth > settings.max_subquery_depth) - throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(), - ErrorCodes::TOO_DEEP_SUBQUERIES); + throw Exception(ErrorCodes::TOO_DEEP_SUBQUERIES, "Too deep subqueries. Maximum: {}", + settings.max_subquery_depth.toString()); bool has_input = input_pipe != std::nullopt; if (input_pipe) @@ -562,7 +562,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - if (try_move_to_prewhere && storage && storage->canMoveConditionsToPrewhere() && query.where() && !query.prewhere()) + if (try_move_to_prewhere + && storage && storage->canMoveConditionsToPrewhere() + && query.where() && !query.prewhere() + && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not. { /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) @@ -606,17 +609,23 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!options.only_analyze) { if (query.sampleSize() && (input_pipe || !storage || !storage->supportsSampling())) - throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); + throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table doesn't support sampling"); if (query.final() && (input_pipe || !storage || !storage->supportsFinal())) - throw Exception( - (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", - ErrorCodes::ILLEGAL_FINAL); + { + if (!input_pipe && storage) + throw Exception(ErrorCodes::ILLEGAL_FINAL, "Storage {} doesn't support FINAL", storage->getName()); + else + throw Exception(ErrorCodes::ILLEGAL_FINAL, "Illegal FINAL"); + } if (query.prewhere() && (input_pipe || !storage || !storage->supportsPrewhere())) - throw Exception( - (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", - ErrorCodes::ILLEGAL_PREWHERE); + { + if (!input_pipe && storage) + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Storage {} doesn't support PREWHERE", storage->getName()); + else + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Illegal PREWHERE"); + } /// Save the new temporary tables in the query context for (const auto & it : query_analyzer->getExternalTables()) @@ -936,7 +945,8 @@ static std::pair getWithFillFieldValue(const ASTPtr & node, auto field_type = evaluateConstantExpression(node, context); if (!isColumnedAsNumber(field_type.second)) - throw Exception("Illegal type " + field_type.second->getName() + " of WITH FILL expression, must be numeric type", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Illegal type {} of WITH FILL expression, must be numeric type", field_type.second->getName()); return field_type; } @@ -951,7 +961,8 @@ static std::pair> getWithFillStep(const ASTPt if (isColumnedAsNumber(type)) return std::make_pair(std::move(field), std::nullopt); - throw Exception("Illegal type " + type->getName() + " of WITH FILL expression, must be numeric type", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Illegal type {} of WITH FILL expression, must be numeric type", type->getName()); } static FillColumnDescription getWithFillDescription(const ASTOrderByElement & order_by_elem, const ContextPtr & context) @@ -969,32 +980,30 @@ static FillColumnDescription getWithFillDescription(const ASTOrderByElement & or descr.fill_step = order_by_elem.direction; if (applyVisitor(FieldVisitorAccurateEquals(), descr.fill_step, Field{0})) - throw Exception("WITH FILL STEP value cannot be zero", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL STEP value cannot be zero"); if (order_by_elem.direction == 1) { if (applyVisitor(FieldVisitorAccurateLess(), descr.fill_step, Field{0})) - throw Exception("WITH FILL STEP value cannot be negative for sorting in ascending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL STEP value cannot be negative for sorting in ascending direction"); if (!descr.fill_from.isNull() && !descr.fill_to.isNull() && applyVisitor(FieldVisitorAccurateLess(), descr.fill_to, descr.fill_from)) { - throw Exception("WITH FILL TO value cannot be less than FROM value for sorting in ascending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL TO value cannot be less than FROM value for sorting in ascending direction"); } } else { if (applyVisitor(FieldVisitorAccurateLess(), Field{0}, descr.fill_step)) - throw Exception("WITH FILL STEP value cannot be positive for sorting in descending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL STEP value cannot be positive for sorting in descending direction"); if (!descr.fill_from.isNull() && !descr.fill_to.isNull() && applyVisitor(FieldVisitorAccurateLess(), descr.fill_from, descr.fill_to)) { - throw Exception("WITH FILL FROM value cannot be less than TO value for sorting in descending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL FROM value cannot be less than TO value for sorting in descending direction"); } } @@ -1123,14 +1132,13 @@ static UInt64 getLimitUIntValue(const ASTPtr & node, const ContextPtr & context, const auto & [field, type] = evaluateConstantExpression(node, context); if (!isNativeNumber(type)) - throw Exception( - "Illegal type " + type->getName() + " of " + expr + " expression, must be numeric type", ErrorCodes::INVALID_LIMIT_EXPRESSION); + throw Exception(ErrorCodes::INVALID_LIMIT_EXPRESSION, "Illegal type {} of {} expression, must be numeric type", + type->getName(), expr); Field converted = convertFieldToType(field, DataTypeUInt64()); if (converted.isNull()) - throw Exception( - "The value " + applyVisitor(FieldVisitorToString(), field) + " of " + expr + " expression is not representable as UInt64", - ErrorCodes::INVALID_LIMIT_EXPRESSION); + throw Exception(ErrorCodes::INVALID_LIMIT_EXPRESSION, "The value {} of {} expression is not representable as UInt64", + applyVisitor(FieldVisitorToString(), field), expr); return converted.safeGet(); } @@ -1339,7 +1347,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info && query_info.input_order_info) - throw Exception("InputOrderInfo is set for projection and for query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "InputOrderInfo is set for projection and for query"); InputOrderInfoPtr input_order_info_for_order; if (!expressions.need_aggregate) input_order_info_for_order = query_info.projection ? query_info.projection->input_order_info : query_info.input_order_info; @@ -1382,7 +1390,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasWindow()) - throw Exception( - "Window functions does not support processing from WithMergeableStateAfterAggregation", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Window functions does not support processing from WithMergeableStateAfterAggregation"); } else if (expressions.need_aggregate) { @@ -2242,7 +2248,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } if (!max_block_size) - throw Exception("Setting 'max_block_size' cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Setting 'max_block_size' cannot be zero"); storage_limits.emplace_back(local_limits); @@ -2256,7 +2262,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Subquery. ASTPtr subquery = extractTableExpression(query, 0); if (!subquery) - throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Subquery expected"); interpreter_subquery = std::make_unique( subquery, getSubqueryContext(context), @@ -2367,7 +2373,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } } else - throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in InterpreterSelectQuery: nowhere to read"); /// Specify the number of threads only if it wasn't specified in storage. /// @@ -2945,7 +2951,7 @@ void InterpreterSelectQuery::executeLimit(QueryPlan & query_plan) if (query.limit_with_ties) { if (!query.orderBy()) - throw Exception("LIMIT WITH TIES without ORDER BY", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY"); order_descr = getSortDescription(query, context); } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index bf384fa5d86..e3954f2a197 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -54,7 +54,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( size_t num_children = ast->list_of_selects->children.size(); if (!num_children) - throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no children in ASTSelectWithUnionQuery"); /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, @@ -81,11 +81,9 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( = getCurrentChildResultHeader(ast->list_of_selects->children.at(query_num), required_result_column_names); if (full_result_header_for_current_select.columns() != full_result_header.columns()) - throw Exception("Different number of columns in UNION ALL elements:\n" - + full_result_header.dumpNames() - + "\nand\n" - + full_result_header_for_current_select.dumpNames() + "\n", - ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + throw Exception(ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH, + "Different number of columns in UNION ALL elements:\n{}\nand\n{}\n", + full_result_header.dumpNames(), full_result_header_for_current_select.dumpNames()); required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size()); for (const auto & pos : positions_of_required_result_columns) @@ -213,11 +211,9 @@ Block InterpreterSelectWithUnionQuery::getCommonHeaderForUnion(const Blocks & he for (size_t query_num = 1; query_num < num_selects; ++query_num) { if (headers[query_num].columns() != num_columns) - throw Exception("Different number of columns in UNION ALL elements:\n" - + common_header.dumpNames() - + "\nand\n" - + headers[query_num].dumpNames() + "\n", - ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + throw Exception(ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH, + "Different number of columns in UNION ALL elements:\n{}\nand\n{}\n", + common_header.dumpNames(), headers[query_num].dumpNames()); } std::vector columns(num_selects); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index eff31b168bd..5e1b74681fe 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -76,14 +76,16 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() else if ((show_query = query_ptr->as())) { if (show_query->temporary) - throw Exception("Temporary databases are not possible.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Temporary databases are not possible."); show_query->setDatabase(getContext()->resolveDatabase(show_query->getDatabase())); getContext()->checkAccess(AccessType::SHOW_DATABASES, show_query->getDatabase()); create_query = DatabaseCatalog::instance().getDatabase(show_query->getDatabase())->getCreateDatabaseQuery(); } if (!create_query) - throw Exception("Unable to show the create query of " + show_query->getTable() + ". Maybe it was created by the system.", ErrorCodes::THERE_IS_NO_QUERY); + throw Exception(ErrorCodes::THERE_IS_NO_QUERY, + "Unable to show the create query of {}. Maybe it was created by the system.", + show_query->getTable()); if (!getContext()->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) { diff --git a/src/Interpreters/InterpreterShowEngineQuery.cpp b/src/Interpreters/InterpreterShowEngineQuery.cpp new file mode 100644 index 00000000000..5aae6ad5d28 --- /dev/null +++ b/src/Interpreters/InterpreterShowEngineQuery.cpp @@ -0,0 +1,18 @@ +#include + +#include +#include +#include + +#include + + +namespace DB +{ + +BlockIO InterpreterShowEnginesQuery::execute() +{ + return executeQuery("SELECT * FROM system.table_engines", getContext(), true); +} + +} diff --git a/src/Interpreters/InterpreterShowEngineQuery.h b/src/Interpreters/InterpreterShowEngineQuery.h new file mode 100644 index 00000000000..3c451e9b071 --- /dev/null +++ b/src/Interpreters/InterpreterShowEngineQuery.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +/** Return list of all engines + */ +class InterpreterShowEnginesQuery : public IInterpreter, WithMutableContext +{ +public: + InterpreterShowEnginesQuery(const ASTPtr & query_ptr_, ContextMutablePtr context_) + : WithMutableContext(context_), query_ptr(query_ptr_) {} + + BlockIO execute() override; + + /// We ignore the quota and limits here because execute() will rewrite a show query as a SELECT query and then + /// the SELECT query will check the quota and limits. + bool ignoreQuota() const override { return true; } + bool ignoreLimits() const override { return true; } + +private: + ASTPtr query_ptr; +}; + +} diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index a6cea66df84..4e0dfdc9236 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -105,7 +105,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() } if (query.temporary && !query.from.empty()) - throw Exception("The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`"); String database = getContext()->resolveDatabase(query.from); DatabaseCatalog::instance().assertDatabaseExists(database); @@ -131,7 +131,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.temporary) { if (query.dictionaries) - throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Temporary dictionaries are not possible."); rewritten_query << "is_temporary"; } else diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index a82a11e7c97..abd0ecd6ea1 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include "config.h" @@ -121,7 +122,7 @@ void executeCommandsAndThrowIfError(Callables && ... commands) { auto status = getOverallExecutionStatusOfCommands(std::forward(commands)...); if (status.code != 0) - throw Exception(status.message, status.code); + throw Exception::createDeprecated(status.message, status.code); } @@ -142,7 +143,7 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type) else if (action_type == ActionLocks::PartsMove) return AccessType::SYSTEM_MOVES; else - throw Exception("Unknown action type: " + std::to_string(action_type), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type)); } constexpr std::string_view table_is_not_replicated = "Table {} is not replicated"; @@ -288,10 +289,16 @@ BlockIO InterpreterSystemQuery::execute() copyData(res->out, out); copyData(res->err, out); if (!out.str().empty()) - LOG_DEBUG(log, "The command returned output: {}", command, out.str()); + LOG_DEBUG(log, "The command {} returned output: {}", command, out.str()); res->wait(); break; } + case Type::SYNC_FILE_CACHE: + { + LOG_DEBUG(log, "Will perform 'sync' syscall (it can take time)."); + sync(); + break; + } case Type::DROP_DNS_CACHE: { getContext()->checkAccess(AccessType::SYSTEM_DROP_DNS_CACHE); @@ -320,6 +327,10 @@ BlockIO InterpreterSystemQuery::execute() getContext()->checkAccess(AccessType::SYSTEM_DROP_MMAP_CACHE); system_context->dropMMappedFileCache(); break; + case Type::DROP_QUERY_CACHE: + getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE); + getContext()->dropQueryCache(); + break; #if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); @@ -435,7 +446,7 @@ BlockIO InterpreterSystemQuery::execute() SymbolIndex::reload(); break; #else - throw Exception("SYSTEM RELOAD SYMBOLS is not supported on current platform", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RELOAD SYMBOLS is not supported on current platform"); #endif } case Type::STOP_MERGES: @@ -553,7 +564,7 @@ BlockIO InterpreterSystemQuery::execute() break; } default: - throw Exception("Unknown type of SYSTEM query", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type of SYSTEM query"); } return result; @@ -676,7 +687,7 @@ void InterpreterSystemQuery::restartReplicas(ContextMutablePtr system_context) void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) { if (query.replica.empty()) - throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name is empty"); if (!table_id.empty()) { @@ -731,10 +742,13 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) ReplicatedTableStatus status; storage_replicated->getStatus(status); if (status.zookeeper_path == query.replica_zk_path) - throw Exception("There is a local table " + storage_replicated->getStorageID().getNameForLogs() + - ", which has the same table path in ZooKeeper. Please check the path in query. " - "If you want to drop replica of this table, use `DROP TABLE` " - "or `SYSTEM DROP REPLICA 'name' FROM db.table`", ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, + "There is a local table {}, which has the same table path in ZooKeeper. " + "Please check the path in query. " + "If you want to drop replica " + "of this table, use `DROP TABLE` " + "or `SYSTEM DROP REPLICA 'name' FROM db.table`", + storage_replicated->getStorageID().getNameForLogs()); } } } @@ -744,18 +758,17 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) bool looks_like_table_path = zookeeper->exists(query.replica_zk_path + "/replicas") || zookeeper->exists(query.replica_zk_path + "/dropped"); if (!looks_like_table_path) - throw Exception("Specified path " + query.replica_zk_path + " does not look like a table path", - ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Specified path {} does not look like a table path", + query.replica_zk_path); if (zookeeper->exists(remote_replica_path + "/is_active")) - throw Exception("Can't remove replica: " + query.replica + ", because it's active", - ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Can't remove replica: {}, because it's active", query.replica); StorageReplicatedMergeTree::dropReplica(zookeeper, query.replica_zk_path, query.replica, log); LOG_INFO(log, "Dropped replica {}", remote_replica_path); } else - throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid query"); } bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const StoragePtr & table) @@ -770,15 +783,15 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora /// Do not allow to drop local replicas and active remote replicas if (query.replica == status.replica_name) - throw Exception("We can't drop local replica, please use `DROP TABLE` " - "if you want to clean the data and drop this replica", ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, + "We can't drop local replica, please use `DROP TABLE` if you want " + "to clean the data and drop this replica"); /// NOTE it's not atomic: replica may become active after this check, but before dropReplica(...) /// However, the main use case is to drop dead replica, which cannot become active. /// This check prevents only from accidental drop of some other replica. if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active")) - throw Exception("Can't drop replica: " + query.replica + ", because it's active", - ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Can't drop replica: {}, because it's active", query.replica); storage_replicated->dropReplica(zookeeper, status.zookeeper_path, query.replica, log); LOG_TRACE(log, "Dropped replica {} of {}", query.replica, table->getStorageID().getNameForLogs()); @@ -789,7 +802,7 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) { if (query.replica.empty()) - throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name is empty"); auto check_not_local_replica = [](const DatabaseReplicated * replicated, const ASTSystemQuery & query) { @@ -852,7 +865,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path); } else - throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid query"); } void InterpreterSystemQuery::syncReplica() @@ -928,13 +941,13 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &) if (auto * storage_distributed = dynamic_cast(DatabaseCatalog::instance().getTable(table_id, getContext()).get())) storage_distributed->flushClusterNodesAllData(getContext()); else - throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} is not distributed", table_id.getNameForLogs()); } [[noreturn]] void InterpreterSystemQuery::restartDisk(String &) { getContext()->checkAccess(AccessType::SYSTEM_RESTART_DISK); - throw Exception("SYSTEM RESTART DISK is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RESTART DISK is not supported"); } @@ -956,6 +969,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_DNS_CACHE: case Type::DROP_MARK_CACHE: case Type::DROP_MMAP_CACHE: + case Type::DROP_QUERY_CACHE: #if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: #endif @@ -1126,6 +1140,11 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() required_access.emplace_back(AccessType::SYSTEM_UNFREEZE); break; } + case Type::SYNC_FILE_CACHE: + { + required_access.emplace_back(AccessType::SYSTEM_SYNC_FILE_CACHE); + break; + } case Type::STOP_LISTEN_QUERIES: case Type::START_LISTEN_QUERIES: case Type::STOP_THREAD_FUZZER: diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index 9d153c2a9d2..b2086831e4e 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -66,12 +66,12 @@ QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() auto storage_name = storage->getName(); if (storage_name == "LiveView" && !getContext()->getSettingsRef().allow_experimental_live_view) - throw Exception("Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')"); else if (storage_name == "WindowView" && !getContext()->getSettingsRef().allow_experimental_window_view) - throw Exception("Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')"); /// List of columns to read to execute the query. Names required_columns = storage->getInMemoryMetadataPtr()->getColumns().getNamesOfPhysical(); @@ -82,10 +82,8 @@ QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() /// Limitation on the number of columns to read. if (settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) - throw Exception("Limit for number of columns to read exceeded. " - "Requested: " + std::to_string(required_columns.size()) - + ", maximum: " + settings.max_columns_to_read.toString(), - ErrorCodes::TOO_MANY_COLUMNS); + throw Exception(ErrorCodes::TOO_MANY_COLUMNS, "Limit for number of columns to read exceeded. " + "Requested: {}, maximum: {}", required_columns.size(), settings.max_columns_to_read.toString()); size_t max_block_size = settings.max_block_size; size_t max_streams = 1; diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp index 6e36b06f9cc..094b58789a8 100644 --- a/src/Interpreters/InterserverCredentials.cpp +++ b/src/Interpreters/InterserverCredentials.cpp @@ -14,10 +14,11 @@ std::unique_ptr InterserverCredentials::make(const Poco::Util::AbstractConfiguration & config, const std::string & root_tag) { if (config.has("user") && !config.has("password")) - throw Exception("Configuration parameter interserver_http_credentials.password can't be empty", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Configuration parameter interserver_http_credentials.password can't be empty"); if (!config.has("user") && config.has("password")) - throw Exception("Configuration parameter interserver_http_credentials.user can't be empty if user specified", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "Configuration parameter interserver_http_credentials.user can't be empty if user specified"); /// They both can be empty auto user = config.getString(root_tag + ".user", ""); diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h index 69b742db2ec..375c6ee9ca5 100644 --- a/src/Interpreters/InterserverIOHandler.h +++ b/src/Interpreters/InterserverIOHandler.h @@ -7,11 +7,11 @@ #include #include #include +#include #include #include #include -#include #include namespace zkutil @@ -43,7 +43,7 @@ public: /// You need to stop the data transfer if blocker is activated. ActionBlocker blocker; - std::shared_mutex rwlock; + SharedMutex rwlock; }; using InterserverIOEndpointPtr = std::shared_ptr; @@ -60,7 +60,7 @@ public: std::lock_guard lock(mutex); bool inserted = endpoint_map.try_emplace(name, std::move(endpoint)).second; if (!inserted) - throw Exception("Duplicate interserver IO endpoint: " + name, ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT); + throw Exception(ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT, "Duplicate interserver IO endpoint: {}", name); } bool removeEndpointIfExists(const String & name) @@ -77,7 +77,7 @@ public: } catch (...) { - throw Exception("No interserver IO endpoint named " + name, ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT); + throw Exception(ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT, "No interserver IO endpoint named {}", name); } private: diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 996fd1e4ac7..15702784d74 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -41,7 +41,7 @@ bool JoinSwitcher::addJoinedBlock(const Block & block, bool) bool JoinSwitcher::switchJoin() { HashJoin * hash_join = assert_cast(join.get()); - BlocksList right_blocks = hash_join->releaseJoinedBlocks(); + BlocksList right_blocks = hash_join->releaseJoinedBlocks(true); /// Destroy old join & create new one. join = std::make_shared(table_join, right_sample_block); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 5500c274c23..bac82d967f2 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -42,7 +42,7 @@ ASTPtr makeSubqueryTemplate() ParserTablesInSelectQueryElement parser(true); ASTPtr subquery_template = parseQuery(parser, "(select * from _t) as `--.s`", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); if (!subquery_template) - throw Exception("Cannot parse subquery template", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse subquery template"); return subquery_template; } @@ -98,10 +98,10 @@ public: name = table_name_it->second; it = table_columns.find(table_name_it->second); if (it == table_columns.end()) - throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown qualified identifier: {}", table_name); } else - throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown qualified identifier: {}", table_name); } for (const auto & column : it->second) @@ -165,7 +165,7 @@ private: has_asterisks = true; if (!qualified_asterisk->qualifier) - throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier"); auto & identifier = qualified_asterisk->qualifier->as(); @@ -180,7 +180,7 @@ private: transformer->as()) IASTColumnsTransformer::transform(transformer, columns); else - throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must only have children of IASTColumnsTransformer type"); } } } @@ -267,7 +267,7 @@ bool needRewrite(ASTSelectQuery & select, std::vectorchildren[i]->as(); if (!table) - throw Exception("Table expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table expected"); if (table->table_expression) if (const auto * expression = table->table_expression->as()) @@ -276,7 +276,7 @@ bool needRewrite(ASTSelectQuery & select, std::vectortable_join && !table->array_join) - throw Exception("Joined table expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Joined table expected"); if (table->array_join) { @@ -286,7 +286,7 @@ bool needRewrite(ASTSelectQuery & select, std::vectortable_join->as(); if (join.kind == JoinKind::Comma) - throw Exception("COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query"); } if (num_tables - num_array_join <= 2) @@ -294,7 +294,7 @@ bool needRewrite(ASTSelectQuery & select, std::vector normalizeColumnNamesExtractNeeded( alias_ident_typed->restoreTable(); bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias(); if (!alias_equals_column_name) - throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Alias clashes with qualified column '{}'", ident->name()); } } String short_name = ident->shortName(); @@ -690,7 +690,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast if (table_expressions.size() != data.tables.size() || tables_count != data.tables.size()) - throw Exception("Inconsistent tables count in JOIN rewriter", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent tables count in JOIN rewriter"); /// Replace * and t.* with columns in select expression list. { @@ -753,15 +753,15 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast CheckAliasDependencyVisitor::Data check{data.aliases}; CheckAliasDependencyVisitor(check).visit(expr.second); if (check.dependency) - throw Exception("Cannot rewrite JOINs. Alias '" + expr.first + - "' used in ON section depends on another alias '" + check.dependency->name() + "'", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rewrite JOINs. " + "Alias '{}' used in ON section depends on another alias '{}'", + expr.first, check.dependency->name()); } /// Check same name in aliases, USING and ON sections. Cannot push down alias to ON through USING cause of name masquerading. for (auto * ident : using_identifiers) if (on_aliases.contains(ident->name())) - throw Exception("Cannot rewrite JOINs. Alias '" + ident->name() + "' appears both in ON and USING", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rewrite JOINs. Alias '{}' appears both in ON and USING", ident->name()); using_identifiers.clear(); /// Replace pushdowned expressions with aliases names in original expression lists. @@ -809,10 +809,10 @@ ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_r const auto * left = ast_left->as(); const auto * right = ast_right->as(); if (!left || !right) - throw Exception("Two TablesInSelectQueryElements expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Two TablesInSelectQueryElements expected"); if (!right->table_join) - throw Exception("Table join expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected"); /// replace '_t' with pair of joined tables RewriteVisitor::Data visitor_data{ast_left, ast_right}; diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index a4ec64ab70e..b8d8dd5df74 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -324,17 +324,20 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) return ptrs; } -ColumnRawPtrMap materializeColumnsInplaceMap(Block & block, const Names & names) +ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & names) { - ColumnRawPtrMap ptrs; + ColumnPtrMap ptrs; ptrs.reserve(names.size()); for (const auto & column_name : names) { - auto & column = block.getByName(column_name); - column.column = recursiveRemoveLowCardinality(column.column->convertToFullColumnIfConst()); - column.type = recursiveRemoveLowCardinality(column.type); - ptrs[column_name] = column.column.get(); + ColumnPtr column = block.getByName(column_name).column; + + column = column->convertToFullColumnIfConst(); + column = recursiveRemoveLowCardinality(column); + column = recursiveRemoveSparse(column); + + ptrs[column_name] = column; } return ptrs; @@ -529,24 +532,24 @@ bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type) JoinMask getColumnAsMask(const Block & block, const String & column_name) { if (column_name.empty()) - return JoinMask(true); + return JoinMask(true, block.rows()); const auto & src_col = block.getByName(column_name); DataTypePtr col_type = recursiveRemoveLowCardinality(src_col.type); if (isNothing(col_type)) - return JoinMask(false); + return JoinMask(false, block.rows()); if (const auto * const_cond = checkAndGetColumn(*src_col.column)) { - return JoinMask(const_cond->getBool(0)); + return JoinMask(const_cond->getBool(0), block.rows()); } ColumnPtr join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst()); if (const auto * nullable_col = typeid_cast(join_condition_col.get())) { if (isNothing(assert_cast(*col_type).getNestedType())) - return JoinMask(false); + return JoinMask(false, block.rows()); /// Return nested column with NULL set to false const auto & nest_col = assert_cast(nullable_col->getNestedColumn()); @@ -639,9 +642,8 @@ Blocks scatterBlockByHash(const Strings & key_columns_names, const Block & block { if (num_shards == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of shards must be positive"); - UNUSED(scatterBlockByHashPow2); - // if (likely(isPowerOf2(num_shards))) - // return scatterBlockByHashPow2(key_columns_names, block, num_shards); + if (likely(isPowerOf2(num_shards))) + return scatterBlockByHashPow2(key_columns_names, block, num_shards); return scatterBlockByHashGeneric(key_columns_names, block, num_shards); } diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h index 36be71f2a91..f112ca22e5b 100644 --- a/src/Interpreters/JoinUtils.h +++ b/src/Interpreters/JoinUtils.h @@ -14,30 +14,34 @@ class TableJoin; class IColumn; using ColumnRawPtrs = std::vector; +using ColumnPtrMap = std::unordered_map; using ColumnRawPtrMap = std::unordered_map; using UInt8ColumnDataPtr = const ColumnUInt8::Container *; namespace JoinCommon { -/// Store boolean column handling constant value without materializing -/// Behaves similar to std::variant, but provides more convenient specialized interface +/// Helper interface to work with mask from JOIN ON section class JoinMask { public: - explicit JoinMask(bool value) + explicit JoinMask() : column(nullptr) - , const_value(value) + {} + + explicit JoinMask(bool value, size_t size) + : column(ColumnUInt8::create(size, value)) {} explicit JoinMask(ColumnPtr col) : column(col) - , const_value(false) {} - bool isConstant() { return !column; } + bool hasData() + { + return column != nullptr; + } - /// Return data if mask is not constant UInt8ColumnDataPtr getData() { if (column) @@ -47,15 +51,11 @@ public: inline bool isRowFiltered(size_t row) const { - if (column) - return !assert_cast(*column).getData()[row]; - return !const_value; + return !assert_cast(*column).getData()[row]; } private: ColumnPtr column; - /// Used if column is null - bool const_value; }; @@ -71,7 +71,7 @@ ColumnPtr emptyNotNullableClone(const ColumnPtr & column); ColumnPtr materializeColumn(const Block & block, const String & name); Columns materializeColumns(const Block & block, const Names & names); ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); -ColumnRawPtrMap materializeColumnsInplaceMap(Block & block, const Names & names); +ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & names); ColumnRawPtrs getRawPointers(const Columns & columns); void convertToFullColumnsInplace(Block & block); void convertToFullColumnsInplace(Block & block, const Names & names, bool change_type = true); diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 1d8676cfc57..7c999803b44 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -143,8 +143,8 @@ private: match == IdentifierSemantic::ColumnMatch::DBAndTable) { if (rewritten) - throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name() + "'", - ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Failed to rewrite distributed table names. Ambiguous column '{}'", + identifier.name()); /// Table has an alias. So we set a new name qualified by table alias. IdentifierSemantic::setColumnLongName(identifier, table); rewritten = true; @@ -161,8 +161,8 @@ private: if (identifier.name() == table.table) { if (rewritten) - throw Exception("Failed to rewrite distributed table. Ambiguous column '" + identifier.name() + "'", - ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Failed to rewrite distributed table. Ambiguous column '{}'", + identifier.name()); identifier.setShortName(table.alias); rewritten = true; } @@ -241,7 +241,7 @@ bool JoinedTables::resolveTables() bool include_materialized_cols = include_all_columns || settings.asterisk_include_materialized_columns; tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols); if (tables_with_columns.size() != table_expressions.size()) - throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected tables count"); if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1) { @@ -250,9 +250,10 @@ bool JoinedTables::resolveTables() const auto & t = tables_with_columns[i]; if (t.table.table.empty() && t.table.alias.empty()) { - throw Exception("No alias for subquery or table function in JOIN (set joined_subquery_requires_alias=0 to disable restriction). While processing '" - + table_expressions[i]->formatForErrorMessage() + "'", - ErrorCodes::ALIAS_REQUIRED); + throw Exception(ErrorCodes::ALIAS_REQUIRED, + "No alias for subquery or table function " + "in JOIN (set joined_subquery_requires_alias=0 to disable restriction). " + "While processing '{}'", table_expressions[i]->formatForErrorMessage()); } } } diff --git a/src/Interpreters/Lemmatizers.cpp b/src/Interpreters/Lemmatizers.cpp index 5044aae083c..c24679de76e 100644 --- a/src/Interpreters/Lemmatizers.cpp +++ b/src/Interpreters/Lemmatizers.cpp @@ -57,17 +57,17 @@ Lemmatizers::Lemmatizers(const Poco::Util::AbstractConfiguration & config) const auto & lemm_path = config.getString(prefix + "." + key + ".path", ""); if (lemm_name.empty()) - throw Exception("Lemmatizer language in config is not specified here: " + prefix + "." + key + ".lang", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer language in config is not specified here: " + "{}.{}.lang", prefix, key); if (lemm_path.empty()) - throw Exception("Path to lemmatizer in config is not specified here: " + prefix + "." + key + ".path", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Path to lemmatizer in config is not specified here: {}.{}.path", + prefix, key); paths[lemm_name] = lemm_path; } else - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'lemmatizer'", - ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}, must be 'lemmatizer'", + prefix, key); } } @@ -81,15 +81,13 @@ Lemmatizers::LemmPtr Lemmatizers::getLemmatizer(const String & name) if (paths.find(name) != paths.end()) { if (!std::filesystem::exists(paths[name])) - throw Exception("Incorrect path to lemmatizer: " + paths[name], - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Incorrect path to lemmatizer: {}", paths[name]); lemmatizers[name] = std::make_shared(paths[name]); return lemmatizers[name]; } - throw Exception("Lemmatizer named: '" + name + "' is not found", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer named: '{}' is not found", name); } } diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index 67ca987d82b..02594269f08 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -156,8 +156,7 @@ void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains() { auto res = or_parent_map.insert(std::make_pair(function, ParentNodes{from_node})); if (!res.second) - throw Exception("LogicalExpressionsOptimizer: parent node information is corrupted", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: parent node information is corrupted"); } } else @@ -332,8 +331,7 @@ void LogicalExpressionsOptimizer::cleanupOrExpressions() auto it = garbage_map.find(or_with_expression.or_function); if (it == garbage_map.end()) - throw Exception("LogicalExpressionsOptimizer: garbage map is corrupted", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: garbage map is corrupted"); auto & first_erased = it->second; first_erased = std::remove_if(operands.begin(), first_erased, [&](const ASTPtr & operand) @@ -369,8 +367,7 @@ void LogicalExpressionsOptimizer::fixBrokenOrExpressions() { auto it = or_parent_map.find(or_function); if (it == or_parent_map.end()) - throw Exception("LogicalExpressionsOptimizer: parent node information is corrupted", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: parent node information is corrupted"); auto & parents = it->second; auto it2 = column_to_position.find(or_function); @@ -379,7 +376,7 @@ void LogicalExpressionsOptimizer::fixBrokenOrExpressions() size_t position = it2->second; bool inserted = column_to_position.emplace(operands[0].get(), position).second; if (!inserted) - throw Exception("LogicalExpressionsOptimizer: internal error", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: internal error"); column_to_position.erase(it2); } diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 2d54accc76a..a5ab6b25d02 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -55,7 +55,7 @@ ColumnWithTypeAndName condtitionColumnToJoinable(const Block & block, const Stri if (!src_column_name.empty()) { auto join_mask = JoinCommon::getColumnAsMask(block, src_column_name); - if (!join_mask.isConstant()) + if (join_mask.hasData()) { for (size_t i = 0; i < res_size; ++i) null_map->getData()[i] = join_mask.isRowFiltered(i); @@ -123,7 +123,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, Block extractMinMax(const Block & block, const Block & keys) { if (block.rows() == 0) - throw Exception("Unexpected empty block", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty block"); Block min_max = keys.cloneEmpty(); MutableColumns columns = min_max.mutateColumns(); @@ -227,7 +227,7 @@ public: { /// SortCursorImpl can work with permutation, but MergeJoinCursor can't. if (impl.permutation) - throw Exception("Logical error: MergeJoinCursor doesn't support permutation", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: MergeJoinCursor doesn't support permutation"); } size_t position() const { return impl.getRow(); } @@ -261,7 +261,7 @@ public: int intersect(const Block & min_max, const Names & key_names) { if (end() == 0 || min_max.rows() != 2) - throw Exception("Unexpected block size", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected block size"); size_t last_position = end() - 1; int first_vs_max = 0; @@ -488,25 +488,25 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right case JoinStrictness::Any: case JoinStrictness::Semi: if (!is_left && !is_inner) - throw Exception("Not supported. MergeJoin supports SEMI and ANY variants only for LEFT and INNER JOINs.", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not supported. MergeJoin supports SEMI and ANY variants only for LEFT and INNER JOINs."); break; default: - throw Exception("Not supported. MergeJoin supports ALL, ANY and SEMI JOINs variants.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not supported. MergeJoin supports ALL, ANY and SEMI JOINs variants."); } if (!max_rows_in_right_block) - throw Exception("partial_merge_join_rows_in_right_blocks cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "partial_merge_join_rows_in_right_blocks cannot be zero"); if (max_files_to_merge < 2) - throw Exception("max_files_to_merge cannot be less than 2", ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "max_files_to_merge cannot be less than 2"); if (!size_limits.hasLimits()) { size_limits.max_bytes = table_join->defaultMaxBytes(); if (!size_limits.max_bytes) - throw Exception("No limit for MergeJoin (max_rows_in_join, max_bytes_in_join or default_max_bytes_in_join have to be set)", - ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "No limit for MergeJoin (max_rows_in_join, max_bytes_in_join " + "or default_max_bytes_in_join have to be set)"); } if (!table_join->oneDisjunct()) diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 770ca0409bf..8b5d884a0e6 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include #include #include #include @@ -72,7 +71,7 @@ private: using Cache = CacheBase, BlockByteWeight>; - mutable std::shared_mutex rwlock; + mutable SharedMutex rwlock; std::shared_ptr table_join; SizeLimits size_limits; SortDescription left_sort_description; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index cec03863c69..c207309a274 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -247,8 +247,7 @@ bool isStorageTouchedByMutations( if (!block.rows()) return false; else if (block.rows() != 1) - throw Exception("count() expression returned " + toString(block.rows()) + " rows, not 1", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "count() expression returned {} rows, not 1", block.rows()); Block tmp_block; while (executor.pull(tmp_block)); @@ -276,7 +275,7 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( else if (storage_from_merge_tree_data_part) partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context); else - throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables"); partition_predicate_as_ast_func = makeASTFunction("equals", std::make_shared("_partition_id"), @@ -456,14 +455,14 @@ static void validateUpdateColumns( for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) { if (col.name == column_name) - throw Exception("Cannot UPDATE materialized column " + backQuote(column_name), ErrorCodes::CANNOT_UPDATE_COLUMN); + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); } - throw Exception("There is no column " + backQuote(column_name) + " in table", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); } if (key_columns.contains(column_name)) - throw Exception("Cannot UPDATE key column " + backQuote(column_name), ErrorCodes::CANNOT_UPDATE_COLUMN); + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE key column {}", backQuote(column_name)); auto materialized_it = column_to_affected_materialized.find(column_name); if (materialized_it != column_to_affected_materialized.end()) @@ -471,9 +470,9 @@ static void validateUpdateColumns( for (const String & materialized : materialized_it->second) { if (key_columns.contains(materialized)) - throw Exception("Updated column " + backQuote(column_name) + " affects MATERIALIZED column " - + backQuote(materialized) + ", which is a key column. Cannot UPDATE it.", - ErrorCodes::CANNOT_UPDATE_COLUMN); + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, + "Updated column {} affects MATERIALIZED column {}, which is a key column. " + "Cannot UPDATE it.", backQuote(column_name), backQuote(materialized)); } } } @@ -512,10 +511,10 @@ static std::optional> getExpressionsOfUpdatedNestedSubcolumn void MutationsInterpreter::prepare(bool dry_run) { if (is_prepared) - throw Exception("MutationsInterpreter is already prepared. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MutationsInterpreter is already prepared. It is a bug."); if (commands.empty()) - throw Exception("Empty mutation commands list", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty mutation commands list"); const ColumnsDescription & columns_desc = metadata_snapshot->getColumns(); const IndicesDescription & indices_desc = metadata_snapshot->getSecondaryIndices(); @@ -713,7 +712,7 @@ void MutationsInterpreter::prepare(bool dry_run) return index.name == command.index_name; }); if (it == std::cend(indices_desc)) - throw Exception("Unknown index: " + command.index_name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown index: {}", command.index_name); auto query = (*it).expression_list_ast->clone(); auto syntax_result = TreeRewriter(context).analyze(query, all_columns); @@ -802,7 +801,7 @@ void MutationsInterpreter::prepare(bool dry_run) read_columns.emplace_back(command.column_name); } else - throw Exception("Unknown mutation command type: " + DB::toString(command.type), ErrorCodes::UNKNOWN_MUTATION_COMMAND); + throw Exception(ErrorCodes::UNKNOWN_MUTATION_COMMAND, "Unknown mutation command type: {}", DB::toString(command.type)); } if (!read_columns.empty()) @@ -1178,7 +1177,7 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v { const auto & step = stage.expressions_chain.steps[i]; if (step->actions()->hasArrayJoin()) - throw Exception("arrayJoin is not allowed in mutations", ErrorCodes::UNEXPECTED_EXPRESSION); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "arrayJoin is not allowed in mutations"); if (i < stage.filter_column_names.size()) { /// Execute DELETEs. @@ -1239,7 +1238,7 @@ void MutationsInterpreter::validate() QueryPipelineBuilder MutationsInterpreter::execute() { if (!can_execute) - throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute mutations interpreter because can_execute flag set to false"); QueryPlan plan; initQueryPlan(stages.front(), plan); diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 70773e2fffb..7ba7749e89b 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -78,7 +78,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) const auto & declare_column = declare_column_ast->as(); if (!declare_column || !declare_column->data_type) - throw Exception("Missing type in definition of column.", ErrorCodes::UNKNOWN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Missing type in definition of column."); bool is_nullable = true; bool is_unsigned = false; @@ -147,7 +147,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) static ColumnsDescription createColumnsDescription(const NamesAndTypesList & columns_name_and_type, const ASTExpressionList * columns_definition) { if (columns_name_and_type.size() != columns_definition->children.size()) - throw Exception("Columns of different size provided.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns of different size provided."); ColumnsDescription columns_description; @@ -337,7 +337,7 @@ static ASTPtr getPartitionPolicy(const NamesAndTypesList & primary_keys) WhichDataType which(type); if (which.isNullable()) - throw Exception("LOGICAL ERROR: MySQL primary key must be not null, it is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: MySQL primary key must be not null, it is a bug."); if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64()) { @@ -440,7 +440,7 @@ void InterpreterCreateImpl::validate(const InterpreterCreateImpl::TQuery & creat missing_columns_definition = false; } if (missing_columns_definition) - throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Missing definition of columns."); } } @@ -473,8 +473,8 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( ColumnsDescription columns_description = createColumnsDescription(columns_name_and_type, create_defines->columns); if (primary_keys.empty()) - throw Exception("The " + backQuoteIfNeed(mysql_database) + "." + backQuoteIfNeed(create_query.table) - + " cannot be materialized, because there is no primary keys.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "The {}.{} cannot be materialized, because there is no primary keys.", + backQuoteIfNeed(mysql_database), backQuoteIfNeed(create_query.table)); auto columns = std::make_shared(); @@ -572,7 +572,7 @@ ASTs InterpreterDropImpl::getRewrittenQueries( void InterpreterRenameImpl::validate(const InterpreterRenameImpl::TQuery & rename_query, ContextPtr /*context*/) { if (rename_query.exchange) - throw Exception("Cannot execute exchange for external ddl query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot execute exchange for external ddl query."); } ASTs InterpreterRenameImpl::getRewrittenQueries( @@ -585,7 +585,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries( const auto & from_database = resolveDatabase(rename_element.from.database, mysql_database, mapped_to_database, context); if ((from_database == mapped_to_database || to_database == mapped_to_database) && to_database != from_database) - throw Exception("Cannot rename with other database for external ddl query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename with other database for external ddl query."); if (from_database == mapped_to_database) { @@ -718,7 +718,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( auto modify_columns = getColumnsList(alter_command->additional_columns); if (modify_columns.size() != 1) - throw Exception("It is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "It is a bug"); new_column_name = modify_columns.front().name; @@ -751,7 +751,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( const auto & to_database = resolveDatabase(alter_command->new_database_name, mysql_database, mapped_to_database, context); if (to_database != mapped_to_database) - throw Exception("Cannot rename with other database for external ddl query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename with other database for external ddl query."); /// For ALTER TABLE table_name RENAME TO new_table_name_1, RENAME TO new_table_name_2; /// We just need to generate RENAME TABLE table_name TO new_table_name_2; diff --git a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp index b3c2063c6f6..cbdd656fb8c 100644 --- a/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp +++ b/src/Interpreters/NormalizeSelectWithUnionQueryVisitor.cpp @@ -65,9 +65,8 @@ void NormalizeSelectWithUnionQueryMatcher::visit(ASTSelectWithUnionQuery & ast, else if (data.union_default_mode == SetOperationMode::DISTINCT) union_modes[i] = SelectUnionMode::UNION_DISTINCT; else - throw Exception( - "Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty", - DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT); + throw Exception(DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT, + "Expected ALL or DISTINCT in SelectWithUnion query, because setting (union_default_mode) is empty"); } if (union_modes[i] == SelectUnionMode::UNION_ALL) diff --git a/src/Interpreters/OptimizeIfChains.cpp b/src/Interpreters/OptimizeIfChains.cpp index d4b4d20bb9c..ba4c7bcd95f 100644 --- a/src/Interpreters/OptimizeIfChains.cpp +++ b/src/Interpreters/OptimizeIfChains.cpp @@ -61,13 +61,14 @@ ASTs OptimizeIfChainsVisitor::ifChain(const ASTPtr & child) { const auto * function_node = child->as(); if (!function_node || !function_node->arguments) - throw Exception("Unexpected AST for function 'if'", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST for function 'if'"); const auto * function_args = function_node->arguments->as(); if (!function_args || function_args->children.size() != 3) - throw Exception("Wrong number of arguments for function 'if' (" + toString(function_args->children.size()) + " instead of 3)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_args->children.size()); const auto * else_arg = function_args->children[2]->as(); diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 824ad22bb12..13b6311a877 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -39,7 +39,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v if (const auto * expr_list = function->arguments->as()) { if (expr_list->children.size() != 2) - throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); const ASTPtr & type_ast = expr_list->children.at(1); if (const auto * type_literal = type_ast->as()) @@ -85,12 +85,12 @@ void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) } if (!function_node->arguments) - throw Exception("Wrong number of arguments for function 'if' (0 instead of 3)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); if (function_node->arguments->children.size() != 3) - throw Exception( - "Wrong number of arguments for function 'if' (" + toString(function_node->arguments->children.size()) + " instead of 3)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_node->arguments->children.size()); visit(function_node->arguments); const auto * args = function_node->arguments->as(); diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index fd77f651ff5..d9ea29fe1d8 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -91,8 +91,9 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e bool is_rewrite_tables = false; if (tables_element.size() != tables_predicates.size()) - throw Exception("Unexpected elements count in predicate push down: `set enable_optimize_predicate_expression = 0` to disable", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected elements count in predicate push down: " + "`set enable_optimize_predicate_expression = 0` to disable"); for (size_t index = tables_element.size(); index > 0; --index) { diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 60298142d92..9ee6cf1afe8 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -78,7 +78,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q const Settings & settings = query_context->getSettingsRef(); if (client_info.current_query_id.empty()) - throw Exception("Query id cannot be empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query id cannot be empty"); bool is_unlimited_query = isUnlimitedQuery(ast); @@ -92,7 +92,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q if (queue_max_wait_ms) LOG_WARNING(&Poco::Logger::get("ProcessList"), "Too many simultaneous queries, will wait {} ms.", queue_max_wait_ms); if (!queue_max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(queue_max_wait_ms), [&]{ return processes.size() < max_size; })) - throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries. Maximum: {}", max_size); } if (!is_unlimited_query) @@ -130,10 +130,8 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q if (!is_unlimited_query && settings.max_concurrent_queries_for_all_users && processes.size() >= settings.max_concurrent_queries_for_all_users) - throw Exception( - "Too many simultaneous queries for all users. Current: " + toString(processes.size()) - + ", maximum: " + settings.max_concurrent_queries_for_all_users.toString(), - ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for all users. " + "Current: {}, maximum: {}", processes.size(), settings.max_concurrent_queries_for_all_users.toString()); } /** Why we use current user? @@ -153,10 +151,11 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { if (!is_unlimited_query && settings.max_concurrent_queries_for_user && user_process_list->second.queries.size() >= settings.max_concurrent_queries_for_user) - throw Exception("Too many simultaneous queries for user " + client_info.current_user - + ". Current: " + toString(user_process_list->second.queries.size()) - + ", maximum: " + settings.max_concurrent_queries_for_user.toString(), - ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, + "Too many simultaneous queries for user {}. " + "Current: {}, maximum: {}", + client_info.current_user, user_process_list->second.queries.size(), + settings.max_concurrent_queries_for_user.toString()); auto running_query = user_process_list->second.queries.find(client_info.current_query_id); @@ -638,7 +637,7 @@ void ProcessList::decreaseQueryKindAmount(const IAST::QueryKind & query_kind) if (found == query_kind_amounts.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong query kind amount: decrease before increase on '{}'", query_kind); else if (found->second == 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong query kind amount: decrease to negative on '{}'", query_kind, found->second); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong query kind amount: decrease to negative on '{}', {}", query_kind, found->second); else found->second -= 1; } diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 34edfc5a2e2..eae8b15c695 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/src/Interpreters/ProfileEventsExt.cpp b/src/Interpreters/ProfileEventsExt.cpp index 1a8176624d2..0f6b52b2611 100644 --- a/src/Interpreters/ProfileEventsExt.cpp +++ b/src/Interpreters/ProfileEventsExt.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/src/Interpreters/QueryAliasesVisitor.cpp b/src/Interpreters/QueryAliasesVisitor.cpp index 1b6f37ac67a..18fb4b75365 100644 --- a/src/Interpreters/QueryAliasesVisitor.cpp +++ b/src/Interpreters/QueryAliasesVisitor.cpp @@ -127,7 +127,7 @@ void QueryAliasesMatcher::visitOther(const ASTPtr & ast, Data & data) if (!alias.empty()) { if (aliases.contains(alias) && ast->getTreeHash() != aliases[alias]->getTreeHash()) - throw Exception(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); + throw Exception::createDeprecated(wrongAliasMessage(ast, aliases[alias], alias), ErrorCodes::MULTIPLE_EXPRESSIONS_FOR_ALIAS); aliases[alias] = ast; } diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 921d004af94..4db61501d3d 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -33,7 +33,7 @@ public: : data(data_) { if (data.level > data.settings.max_ast_depth) - throw Exception("Normalized AST is too deep. Maximum: " + toString(data.settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST); + throw Exception(ErrorCodes::TOO_DEEP_AST, "Normalized AST is too deep. Maximum: {}", data.settings.max_ast_depth); ++data.level; } @@ -83,7 +83,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). auto it_alias = data.aliases.find(node.name()); if (!data.allow_self_aliases && current_alias == node.name()) - throw Exception(ErrorCodes::CYCLIC_ALIASES, "Self referencing of {} to {}. Cyclic alias", backQuote(current_alias), backQuote(node.name())); + throw Exception(ErrorCodes::CYCLIC_ALIASES, "Self referencing of {} to {}. Cyclic alias", + backQuote(current_alias), backQuote(node.name())); if (it_alias != data.aliases.end() && current_alias != node.name()) { @@ -101,7 +102,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) if (current_asts.contains(alias_node.get()) /// We have loop of multiple aliases || (node.name() == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop - throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); + throw Exception(ErrorCodes::CYCLIC_ALIASES, "Cyclic aliases"); /// Let's replace it with the corresponding tree node. if (!node_alias.empty() && node_alias != our_alias_or_name) @@ -178,7 +179,7 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data) if (func_node->tryGetQueryArgument()) { if (func_node->name != "view") - throw Exception("Query argument can only be used in the `view` TableFunction", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Query argument can only be used in the `view` TableFunction"); /// Don't go into query argument. return; } @@ -268,7 +269,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) else if (auto * node_param = ast->as()) { if (!data.is_create_parameterized_view) - throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Query parameter {} was not set", backQuote(node_param->name)); } else if (auto * node_function = ast->as()) if (node_function->parameters) diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 98e35d69ab7..6f7a0b83128 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -55,7 +55,7 @@ const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) if (search != query_parameters.end()) return search->second; else - throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Substitution {} is not set", backQuote(name)); } void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index 18cbfaee63f..1bcec02f0c0 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -21,19 +21,19 @@ namespace ErrorCodes std::vector RequiredSourceColumnsMatcher::extractNamesFromLambda(const ASTFunction & node) { if (node.arguments->children.size() != 2) - throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); const auto * lambda_args_tuple = node.arguments->children[0]->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") - throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); std::vector names; for (auto & child : lambda_args_tuple->arguments->children) { const auto * identifier = child->as(); if (!identifier) - throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "lambda argument declarations must be identifiers"); names.push_back(identifier->name()); } @@ -171,7 +171,7 @@ void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPt { // FIXME(ilezhankin): shouldn't ever encounter if (node.name().empty()) - throw Exception("Expected not empty name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Expected not empty name"); if (!data.private_aliases.contains(node.name())) data.addColumnIdentifier(node); @@ -211,7 +211,7 @@ void RequiredSourceColumnsMatcher::visit(const ASTArrayJoin & node, const ASTPtr { ASTPtr expression_list = node.expression_list; if (!expression_list || expression_list->children.empty()) - throw Exception("Expected not empty expression_list", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Expected not empty expression_list"); std::vector out; diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 0e553ef145e..2d4f807ad46 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -238,7 +238,7 @@ AsofRowRefs createAsofRowRef(TypeIndex type, ASOFJoinInequality inequality) result = std::make_unique>(); break; default: - throw Exception("Invalid ASOF Join order", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ASOF Join order"); } }; @@ -265,7 +265,7 @@ std::optional SortedLookupVectorBase::getTypeSize(const IColumn & aso DISPATCH(DateTime64) #undef DISPATCH - throw Exception("ASOF join not supported for type: " + std::string(asof_column.getFamilyName()), ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "ASOF join not supported for type: {}", std::string(asof_column.getFamilyName())); } } diff --git a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp index 756a8a48e25..c72961493f0 100644 --- a/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp +++ b/src/Interpreters/SelectIntersectExceptQueryVisitor.cpp @@ -59,9 +59,8 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Dat else if (data.except_default_mode == SetOperationMode::DISTINCT) mode = SelectUnionMode::EXCEPT_DISTINCT; else - throw Exception( - "Expected ALL or DISTINCT in EXCEPT query, because setting (except_default_mode) is empty", - DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT); + throw Exception(DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT, + "Expected ALL or DISTINCT in EXCEPT query, because setting (except_default_mode) is empty"); } else if (mode == SelectUnionMode::INTERSECT_DEFAULT) { @@ -70,9 +69,8 @@ void SelectIntersectExceptQueryMatcher::visit(ASTSelectWithUnionQuery & ast, Dat else if (data.intersect_default_mode == SetOperationMode::DISTINCT) mode = SelectUnionMode::INTERSECT_DISTINCT; else - throw Exception( - "Expected ALL or DISTINCT in INTERSECT query, because setting (intersect_default_mode) is empty", - DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT); + throw Exception(DB::ErrorCodes::EXPECTED_ALL_OR_DISTINCT, + "Expected ALL or DISTINCT in INTERSECT query, because setting (intersect_default_mode) is empty"); } switch (mode) diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index b6f120edc6c..5c72e24c577 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -107,7 +107,7 @@ public: if (it == sessions.end()) { if (throw_if_not_found) - throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); + throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session not found."); /// Create a new session from current context. auto context = Context::createCopy(global_context); @@ -129,7 +129,7 @@ public: LOG_TEST(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); if (!session.unique()) - throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED); + throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session is locked by a concurrent client."); return {session, false}; } } @@ -311,7 +311,7 @@ void Session::authenticate(const String & user_name, const String & password, co void Session::authenticate(const Credentials & credentials_, const Poco::Net::SocketAddress & address_) { if (session_context) - throw Exception("If there is a session context it must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "If there is a session context it must be created after authentication"); auto address = address_; if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) @@ -362,11 +362,11 @@ const ClientInfo & Session::getClientInfo() const ContextMutablePtr Session::makeSessionContext() { if (session_context) - throw Exception("Session context already exists", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context already exists"); if (query_context_created) - throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) - throw Exception("Session context must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); LOG_DEBUG(log, "{} Creating session context with user_id: {}", toString(auth_id), toString(*user_id)); @@ -394,11 +394,11 @@ ContextMutablePtr Session::makeSessionContext() ContextMutablePtr Session::makeSessionContext(const String & session_name_, std::chrono::steady_clock::duration timeout_, bool session_check_) { if (session_context) - throw Exception("Session context already exists", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context already exists"); if (query_context_created) - throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) - throw Exception("Session context must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", toString(auth_id), session_name_, toString(*user_id)); @@ -453,7 +453,7 @@ std::shared_ptr Session::getSessionLog() const ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const { if (!user_id && getClientInfo().interface != ClientInfo::Interface::TCP_INTERSERVER) - throw Exception("Session context must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); /// We can create a query context either from a session context or from a global context. bool from_session_context = static_cast(session_context); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index e75232aa0f5..75bb05f8346 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -176,10 +176,10 @@ bool Set::insertFromBlock(const ColumnsWithTypeAndName & columns) bool Set::insertFromBlock(const Columns & columns) { - std::lock_guard lock(rwlock); + std::lock_guard lock(rwlock); if (data.empty()) - throw Exception("Method Set::setHeader must be called before Set::insertFromBlock", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Method Set::setHeader must be called before Set::insertFromBlock"); ColumnRawPtrs key_columns; key_columns.reserve(keys_size); @@ -242,7 +242,7 @@ ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) co size_t num_key_columns = columns.size(); if (0 == num_key_columns) - throw Exception("Logical error: no columns passed to Set::execute method.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no columns passed to Set::execute method."); auto res = ColumnUInt8::create(); ColumnUInt8::Container & vec_res = res->getData(); @@ -416,9 +416,9 @@ bool Set::areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) con void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const { if (!this->areTypesEqual(set_type_idx, other_type)) - throw Exception("Types of column " + toString(set_type_idx + 1) + " in section IN don't match: " - + other_type->getName() + " on the left, " - + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Types of column {} in section IN don't match: " + "{} on the left, {} on the right", toString(set_type_idx + 1), + other_type->getName(), data_types[set_type_idx]->getName()); } MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_) diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index bafb0dcea7a..00eff614c7c 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -8,6 +7,7 @@ #include #include +#include #include @@ -131,7 +131,7 @@ private: /** Protects work with the set in the functions `insertFromBlock` and `execute`. * These functions can be called simultaneously from different threads only when using StorageSet, */ - mutable std::shared_mutex rwlock; + mutable SharedMutex rwlock; template void insertFromBlockImpl( diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp index f1fdc6c4095..cd9148a01cf 100644 --- a/src/Interpreters/SetVariants.cpp +++ b/src/Interpreters/SetVariants.cpp @@ -119,7 +119,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose /// Pack if possible all the keys along with information about which key values are nulls /// into a fixed 16- or 32-byte blob. if (keys_bytes > (std::numeric_limits::max() - std::tuple_size>::value)) - throw Exception{"Aggregator: keys sizes overflow", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Aggregator: keys sizes overflow"); if ((std::tuple_size>::value + keys_bytes) <= 16) return Type::nullable_keys128; if ((std::tuple_size>::value + keys_bytes) <= 32) @@ -146,7 +146,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose return Type::keys128; if (size_of_field == 32) return Type::keys256; - throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys diff --git a/src/Interpreters/SetVariants.h b/src/Interpreters/SetVariants.h index d6be996effb..71187c9f109 100644 --- a/src/Interpreters/SetVariants.h +++ b/src/Interpreters/SetVariants.h @@ -136,20 +136,17 @@ class BaseStateKeysFixed protected: void init(const ColumnRawPtrs &) { - throw Exception{"Internal error: calling init() for non-nullable" - " keys is forbidden", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error: calling init() for non-nullable keys is forbidden"); } const ColumnRawPtrs & getActualColumns() const { - throw Exception{"Internal error: calling getActualColumns() for non-nullable" - " keys is forbidden", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error: calling getActualColumns() for non-nullable keys is forbidden"); } KeysNullMap createBitmap(size_t) const { - throw Exception{"Internal error: calling createBitmap() for non-nullable keys" - " is forbidden", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error: calling createBitmap() for non-nullable keys is forbidden"); } }; diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 593b141e550..4ed0dddc191 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -110,7 +110,7 @@ bool SquashingTransform::isEnoughSize(const Block & block) if (!rows) rows = column->size(); else if (rows != column->size()) - throw Exception("Sizes of columns doesn't match", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); bytes += column->byteSize(); } diff --git a/src/Interpreters/StorageID.cpp b/src/Interpreters/StorageID.cpp index 70dea02ccc5..b3a504d7ef4 100644 --- a/src/Interpreters/StorageID.cpp +++ b/src/Interpreters/StorageID.cpp @@ -40,7 +40,7 @@ StorageID::StorageID(const ASTPtr & node) else if (const auto * simple_query = dynamic_cast(node.get())) *this = StorageID(*simple_query); else - throw Exception("Unexpected AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST"); } String StorageID::getTableName() const @@ -53,7 +53,7 @@ String StorageID::getDatabaseName() const { assertNotEmpty(); if (database_name.empty()) - throw Exception("Database name is empty", ErrorCodes::UNKNOWN_DATABASE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database name is empty"); return database_name; } diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index 68c83f753b5..147d50b4e4f 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -79,9 +79,9 @@ struct StorageID { // Can be triggered by user input, e.g. SELECT joinGetOrNull('', 'num', 500) if (empty()) - throw Exception("Both table name and UUID are empty", ErrorCodes::UNKNOWN_TABLE); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Both table name and UUID are empty"); if (table_name.empty() && !database_name.empty()) - throw Exception("Table name is empty, but database name is not", ErrorCodes::UNKNOWN_TABLE); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table name is empty, but database name is not"); } /// Avoid implicit construction of empty StorageID. However, it's needed for deferred initialization. diff --git a/src/Interpreters/SubstituteColumnOptimizer.cpp b/src/Interpreters/SubstituteColumnOptimizer.cpp index da738d3db1e..d98491aaf9e 100644 --- a/src/Interpreters/SubstituteColumnOptimizer.cpp +++ b/src/Interpreters/SubstituteColumnOptimizer.cpp @@ -242,7 +242,7 @@ void SubstituteColumnOptimizer::perform() { auto * list = select_query->refSelect()->as(); if (!list) - throw Exception("List of selected columns must be ASTExpressionList", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "List of selected columns must be ASTExpressionList"); for (ASTPtr & ast : list->children) ast->setAlias(ast->getAliasOrColumnName()); diff --git a/src/Interpreters/SynonymsExtensions.cpp b/src/Interpreters/SynonymsExtensions.cpp index 7979c849975..8715b321a43 100644 --- a/src/Interpreters/SynonymsExtensions.cpp +++ b/src/Interpreters/SynonymsExtensions.cpp @@ -35,8 +35,7 @@ public: { std::ifstream file(path); if (!file.is_open()) - throw Exception("Cannot find synonyms extension at: " + path, - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Cannot find synonyms extension at: {}", path); String line; while (std::getline(file, line)) @@ -104,24 +103,24 @@ SynonymsExtensions::SynonymsExtensions(const Poco::Util::AbstractConfiguration & const auto & ext_type = config.getString(prefix + "." + key + ".type", ""); if (ext_name.empty()) - throw Exception("Extension name in config is not specified here: " + prefix + "." + key + ".name", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension name in config is not specified here: {}.{}.name", + prefix, key); if (ext_path.empty()) - throw Exception("Extension path in config is not specified here: " + prefix + "." + key + ".path", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension path in config is not specified here: {}.{}.path", + prefix, key); if (ext_type.empty()) - throw Exception("Extension type in config is not specified here: " + prefix + "." + key + ".type", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension type in config is not specified here: {}.{}.type", + prefix, key); if (ext_type != "plain" && ext_type != "wordnet") - throw Exception("Unknown extension type in config: " + prefix + "." + key + ".type, must be 'plain' or 'wordnet'", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown extension type in config: " + "{}.{}.type, must be 'plain' or 'wordnet'", prefix, key); info[ext_name].path = ext_path; info[ext_name].type = ext_type; } else - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'extension'", - ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}, must be 'extension'", + prefix, key); } } @@ -141,13 +140,12 @@ SynonymsExtensions::ExtPtr SynonymsExtensions::getExtension(const String & name) else if (ext_info.type == "wordnet") extensions[name] = std::make_shared(ext_info.path); else - throw Exception("Unknown extension type: " + ext_info.type, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown extension type: {}", ext_info.type); return extensions[name]; } - throw Exception("Extension named: '" + name + "' is not found", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension named: '{}' is not found", name); } } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index b74f550f697..f9343b7889d 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -136,17 +136,15 @@ std::shared_ptr createSystemLog( if (config.has(config_prefix + ".engine")) { if (config.has(config_prefix + ".partition_by")) - throw Exception("If 'engine' is specified for system table, " - "PARTITION BY parameters should be specified directly inside 'engine' and 'partition_by' setting doesn't make sense", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "If 'engine' is specified for system table, PARTITION BY parameters should " + "be specified directly inside 'engine' and 'partition_by' setting doesn't make sense"); if (config.has(config_prefix + ".ttl")) - throw Exception("If 'engine' is specified for system table, " - "TTL parameters should be specified directly inside 'engine' and 'ttl' setting doesn't make sense", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "If 'engine' is specified for system table, " + "TTL parameters should be specified directly inside 'engine' and 'ttl' setting doesn't make sense"); if (config.has(config_prefix + ".storage_policy")) - throw Exception("If 'engine' is specified for system table, SETTINGS storage_policy = '...' " - "should be specified directly inside 'engine' and 'storage_policy' setting doesn't make sense", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "If 'engine' is specified for system table, SETTINGS storage_policy = '...' " + "should be specified directly inside 'engine' and 'storage_policy' setting doesn't make sense"); engine = config.getString(config_prefix + ".engine"); } else diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 78218ac59a5..7ea7a265263 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -141,7 +141,7 @@ void TableJoin::addDisjunct() clauses.emplace_back(); if (getStorageJoin() && clauses.size() > 1) - throw Exception("StorageJoin with ORs is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); } void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) @@ -490,11 +490,11 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig if (strictness() == JoinStrictness::Asof) { if (clauses.size() != 1) - throw DB::Exception("ASOF join over multiple keys is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over multiple keys is not supported"); auto asof_key_type = right_types.find(clauses.back().key_names_right.back()); if (asof_key_type != right_types.end() && asof_key_type->second->isNullable()) - throw DB::Exception("ASOF join over right table Nullable column is not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over right table Nullable column is not implemented"); } forAllKeys(clauses, [&](const auto & left_key_name, const auto & right_key_name) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 9d03c9bd57b..84390adc0df 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -198,7 +198,7 @@ public: : size_limits(limits) , default_max_bytes(0) , join_use_nulls(use_nulls) - , join_algorithm(JoinAlgorithm::HASH) + , join_algorithm(JoinAlgorithm::DEFAULT) { clauses.emplace_back().key_names_right = key_names_right; table_join.kind = kind; diff --git a/src/Interpreters/TableOverrideUtils.cpp b/src/Interpreters/TableOverrideUtils.cpp index 58e885380bf..afff5ac0111 100644 --- a/src/Interpreters/TableOverrideUtils.cpp +++ b/src/Interpreters/TableOverrideUtils.cpp @@ -106,7 +106,9 @@ void TableOverrideAnalyzer::analyze(const StorageInMemoryMetadata & metadata, Re if (auto col_default = metadata.columns.getDefault(found->name)) existing_default_kind = col_default->kind; if (existing_default_kind != override_default_kind) - throw Exception(ErrorCodes::INVALID_TABLE_OVERRIDE, "column {}: modifying default specifier is not allowed", backQuote(override_column->name)); + throw Exception(ErrorCodes::INVALID_TABLE_OVERRIDE, + "column {}: modifying default specifier is not allowed", + backQuote(override_column->name)); result.modified_columns.push_back({found->name, override_type}); /// TODO: validate that the original type can be converted to the overridden type } diff --git a/src/Interpreters/TablesStatus.cpp b/src/Interpreters/TablesStatus.cpp index 5d94624be85..005a4515c3a 100644 --- a/src/Interpreters/TablesStatus.cpp +++ b/src/Interpreters/TablesStatus.cpp @@ -35,9 +35,7 @@ void TableStatus::read(ReadBuffer & in) void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "Logical error: method TablesStatusRequest::write is called for unsupported server revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method TablesStatusRequest::write is called for unsupported server revision"); writeVarUInt(tables.size(), out); for (const auto & table_name : tables) @@ -50,15 +48,13 @@ void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revisi void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision) { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusRequest::read is called for unsupported client revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusRequest::read is called for unsupported client revision"); size_t size = 0; readVarUInt(size, in); if (size > DEFAULT_MAX_STRING_SIZE) - throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large collection size."); for (size_t i = 0; i < size; ++i) { @@ -72,9 +68,7 @@ void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision) void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revision) const { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusResponse::write is called for unsupported client revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusResponse::write is called for unsupported client revision"); writeVarUInt(table_states_by_id.size(), out); for (const auto & kv: table_states_by_id) @@ -91,15 +85,13 @@ void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revis void TablesStatusResponse::read(ReadBuffer & in, UInt64 server_protocol_revision) { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusResponse::read is called for unsupported server revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusResponse::read is called for unsupported server revision"); size_t size = 0; readVarUInt(size, in); if (size > DEFAULT_MAX_STRING_SIZE) - throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large collection size."); for (size_t i = 0; i < size; ++i) { diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 9e9389451b7..e183124cadf 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -52,13 +52,13 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, si else if (volume) return createStreamToRegularFile(header, max_file_size); - throw Exception("TemporaryDataOnDiskScope has no cache and no volume", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache and no volume"); } TemporaryFileStream & TemporaryDataOnDisk::createStreamToCacheFile(const Block & header, size_t max_file_size) { if (!file_cache) - throw Exception("TemporaryDataOnDiskScope has no cache", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache"); auto holder = file_cache->set(FileSegment::Key::random(), 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true)); @@ -70,14 +70,14 @@ TemporaryFileStream & TemporaryDataOnDisk::createStreamToCacheFile(const Block & TemporaryFileStream & TemporaryDataOnDisk::createStreamToRegularFile(const Block & header, size_t max_file_size) { if (!volume) - throw Exception("TemporaryDataOnDiskScope has no volume", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no volume"); DiskPtr disk; if (max_file_size > 0) { auto reservation = volume->reserve(max_file_size); if (!reservation) - throw Exception("Not enough space on temporary disk", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on temporary disk"); disk = reservation->getDisk(); } else @@ -131,7 +131,7 @@ struct TemporaryFileStream::OutputWriter size_t write(const Block & block) { if (finalized) - throw Exception("Cannot write to finalized stream", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized stream"); size_t written_bytes = out_writer.write(block); num_rows += block.rows(); return written_bytes; @@ -140,7 +140,7 @@ struct TemporaryFileStream::OutputWriter void flush() { if (finalized) - throw Exception("Cannot flush finalized stream", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot flush finalized stream"); out_compressed_buf.next(); out_buf->next(); @@ -233,7 +233,7 @@ TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const size_t TemporaryFileStream::write(const Block & block) { if (!out_writer) - throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been finished"); updateAllocAndCheck(); size_t bytes_written = out_writer->write(block); @@ -243,7 +243,7 @@ size_t TemporaryFileStream::write(const Block & block) void TemporaryFileStream::flush() { if (!out_writer) - throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been finished"); out_writer->flush(); } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 4b757e0be7e..816b03f3a0e 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -151,12 +151,12 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool if (thread_state == ThreadState::AttachedToQuery) { if (check_detached) - throw Exception("Can't attach query to the thread, it is already attached", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't attach query to the thread, it is already attached"); return; } if (!thread_group_) - throw Exception("Attempt to attach to nullptr thread group", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to attach to nullptr thread group"); setupState(thread_group_); } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 36691885459..aeb912ddfbb 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -125,8 +125,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (data.unknownColumn(table_pos, identifier)) { String table_name = data.tables[table_pos].table.getQualifiedNamePrefix(false); - throw Exception("There's no column '" + identifier.name() + "' in table '" + table_name + "'", - ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "There's no column '{}' in table '{}'", identifier.name(), table_name); } IdentifierSemantic::setMembership(identifier, table_pos); @@ -159,7 +158,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data) { if (!node.qualifier) - throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier"); /// @note it could contain table alias as table name. DatabaseAndTableWithAlias db_and_table(node.qualifier); @@ -168,7 +167,7 @@ void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, co if (db_and_table.satisfies(known_table.table, true)) return; - throw Exception("Unknown qualified identifier: " + node.qualifier->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown qualified identifier: {}", node.qualifier->getAliasOrColumnName()); } void TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data & data) @@ -218,7 +217,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt if (child->as() || child->as() || child->as()) { if (tables_with_columns.empty()) - throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "An asterisk cannot be replaced with empty columns."); has_asterisk = true; } else if (const auto * qa = child->as()) @@ -349,8 +348,8 @@ void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(ASTPtr ast, Data & { String alias = key->tryGetAlias(); if (alias.empty()) - throw Exception("Wrong key in USING. Expected identifier or alias, got: " + key->getID(), - ErrorCodes::UNSUPPORTED_JOIN_KEYS); + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Wrong key in USING. Expected identifier or alias, got: {}", + key->getID()); data.join_using_columns.insert(alias); } } diff --git a/src/Interpreters/TreeCNFConverter.cpp b/src/Interpreters/TreeCNFConverter.cpp index 8812e90a5f0..d036c6728fe 100644 --- a/src/Interpreters/TreeCNFConverter.cpp +++ b/src/Interpreters/TreeCNFConverter.cpp @@ -49,7 +49,7 @@ void splitMultiLogic(ASTPtr & node) if (func && (func->name == "and" || func->name == "or")) { if (func->arguments->children.size() < 2) - throw Exception("Bad AND or OR function. Expected at least 2 arguments", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Bad AND or OR function. Expected at least 2 arguments"); if (func->arguments->children.size() > 2) { @@ -82,7 +82,7 @@ void traversePushNot(ASTPtr & node, bool add_negation) if (add_negation) { if (func->arguments->children.size() != 2) - throw Exception("Bad AND or OR function. Expected at least 2 arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Bad AND or OR function. Expected at least 2 arguments"); /// apply De Morgan's Law node = makeASTFunction( @@ -98,7 +98,7 @@ void traversePushNot(ASTPtr & node, bool add_negation) else if (func && func->name == "not") { if (func->arguments->children.size() != 1) - throw Exception("Bad NOT function. Expected 1 argument", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Bad NOT function. Expected 1 argument"); /// delete NOT node = func->arguments->children[0]->clone(); @@ -189,7 +189,7 @@ void traverseCNF(const ASTPtr & node, CNFQuery::AndGroup & and_group, CNFQuery:: else if (func && func->name == "not") { if (func->arguments->children.size() != 1) - throw Exception("Bad NOT function. Expected 1 argument", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Bad NOT function. Expected 1 argument"); or_group.insert(CNFQuery::AtomicFormula{true, func->arguments->children.front()}); } else diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 6a8c9dc7dbd..a63d3349e08 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -434,7 +434,7 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context auto * order_by_element = child->as(); if (!order_by_element || order_by_element->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (order_by_element->with_fill) return; @@ -513,7 +513,7 @@ void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, Co auto * order_by_element = child->as(); if (!order_by_element || order_by_element->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (order_by_element->with_fill) return; @@ -747,7 +747,7 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, auto * select_query = query->as(); if (!select_query) - throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns()) optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 349855987a0..f88094ffbc6 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -208,73 +208,8 @@ struct CustomizeAggregateFunctionsMoveSuffixData } }; -struct FuseSumCountAggregates -{ - std::vector sums {}; - std::vector counts {}; - std::vector avgs {}; - - void addFuncNode(ASTFunction * func) - { - if (func->name == "sum") - sums.push_back(func); - else if (func->name == "count") - counts.push_back(func); - else - { - assert(func->name == "avg"); - avgs.push_back(func); - } - } - - bool canBeFused() const - { - // Need at least two different kinds of functions to fuse. - if (sums.empty() && counts.empty()) - return false; - if (sums.empty() && avgs.empty()) - return false; - if (counts.empty() && avgs.empty()) - return false; - return true; - } -}; - -struct FuseSumCountAggregatesVisitorData -{ - using TypeToVisit = ASTFunction; - - std::unordered_map fuse_map; - - void visit(ASTFunction & func, ASTPtr &) - { - if (func.name == "sum" || func.name == "avg" || func.name == "count") - { - if (func.arguments->children.empty()) - return; - - // Probably we can extend it to match count() for non-nullable argument - // to sum/avg with any other argument. Now we require strict match. - const auto argument = func.arguments->children.at(0)->getColumnName(); - auto it = fuse_map.find(argument); - if (it != fuse_map.end()) - { - it->second.addFuncNode(&func); - } - else - { - FuseSumCountAggregates funcs{}; - funcs.addFuncNode(&func); - fuse_map[argument] = funcs; - } - } - } -}; - using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor, true>; using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor, true>; -using FuseSumCountAggregatesVisitor = InDepthNodeVisitor, true>; - struct ExistsExpressionData { @@ -373,53 +308,7 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query /// This may happen after expansion of COLUMNS('regexp'). if (select_query.select()->children.empty()) - throw Exception("Empty list of columns in SELECT query", ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); -} - -// Replaces one avg/sum/count function with an appropriate expression with -// sumCount(). -void replaceWithSumCount(String column_name, ASTFunction & func) -{ - auto func_base = makeASTFunction("sumCount", std::make_shared(column_name)); - auto exp_list = std::make_shared(); - if (func.name == "sum" || func.name == "count") - { - /// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2 - UInt8 idx = (func.name == "sum" ? 1 : 2); - func.name = "tupleElement"; - exp_list->children.push_back(func_base); - exp_list->children.push_back(std::make_shared(idx)); - } - else - { - /// Rewrite "avg" to sumCount().1 / sumCount().2 - auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(1))); - auto new_arg2 = makeASTFunction("CAST", - makeASTFunction("tupleElement", func_base, std::make_shared(static_cast(2))), - std::make_shared("Float64")); - - func.name = "divide"; - exp_list->children.push_back(new_arg1); - exp_list->children.push_back(new_arg2); - } - func.arguments = exp_list; - func.children.push_back(func.arguments); -} - -void fuseSumCountAggregates(std::unordered_map & fuse_map) -{ - for (auto & it : fuse_map) - { - if (it.second.canBeFused()) - { - for (auto & func: it.second.sums) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.avgs) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.counts) - replaceWithSumCount(it.first, *func); - } - } + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, "Empty list of columns in SELECT query"); } bool hasArrayJoin(const ASTPtr & ast) @@ -616,7 +505,7 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS if (result.array_join_result_to_source.empty()) { if (select_query->arrayJoinExpressionList().first->children.empty()) - throw DB::Exception("ARRAY JOIN requires an argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw DB::Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "ARRAY JOIN requires an argument"); ASTPtr expr = select_query->arrayJoinExpressionList().first->children.at(0); String source_name = expr->getColumnName(); @@ -641,7 +530,7 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS } } if (!found) - throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE); + throw Exception(ErrorCodes::EMPTY_NESTED_TABLE, "No columns in nested table {}", source_name); } } } @@ -662,8 +551,8 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul else if (join_default_strictness == JoinStrictness::All) table_join.strictness = JoinStrictness::All; else - throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty", - DB::ErrorCodes::EXPECTED_ALL_OR_ANY); + throw Exception(DB::ErrorCodes::EXPECTED_ALL_OR_ANY, + "Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty"); } if (old_any) @@ -681,7 +570,7 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul else { if (table_join.strictness == JoinStrictness::Any && table_join.kind == JoinKind::Full) - throw Exception("ANY FULL JOINs are not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); } analyzed_join->getTableJoin() = table_join; @@ -1220,6 +1109,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select if (!unknown_required_source_columns.empty()) { + constexpr auto format_string = "Missing columns: {} while processing query: '{}', required columns:{}{}"; WriteBufferFromOwnString ss; ss << "Missing columns:"; for (const auto & name : unknown_required_source_columns) @@ -1276,7 +1166,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select ss << " '" << name << "'"; } - throw Exception(ss.str(), ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(PreformattedMessage{ss.str(), format_string}, ErrorCodes::UNKNOWN_IDENTIFIER); } required_source_columns.swap(source_columns); @@ -1307,7 +1197,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( { auto * select_query = query->as(); if (!select_query) - throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); size_t subquery_depth = select_options.subquery_depth; bool remove_duplicates = select_options.remove_duplicates; @@ -1471,7 +1361,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( bool is_create_parameterized_view) const { if (query->as()) - throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not select analyze for select asts."); const auto & settings = getContext()->getSettingsRef(); @@ -1544,17 +1434,6 @@ void TreeRewriter::normalize( CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query); } - // Try to fuse sum/avg/count with identical arguments to one sumCount call, - // if we have at least two different functions. E.g. we will replace sum(x) - // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will - // be calculated only once because of CSE. - if (settings.optimize_fuse_sum_count_avg && settings.optimize_syntax_fuse_functions) - { - FuseSumCountAggregatesVisitor::Data data; - FuseSumCountAggregatesVisitor(data).visit(query); - fuseSumCountAggregates(data.fuse_map); - } - /// Rewrite all aggregate functions to add -OrNull suffix to them if (settings.aggregate_functions_null_for_empty) { diff --git a/src/Interpreters/addTypeConversionToAST.cpp b/src/Interpreters/addTypeConversionToAST.cpp index 2f766880253..65feff30f4a 100644 --- a/src/Interpreters/addTypeConversionToAST.cpp +++ b/src/Interpreters/addTypeConversionToAST.cpp @@ -41,7 +41,7 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const Nam for (const auto & action : actions->getActions()) if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + throw Exception(ErrorCodes::THERE_IS_NO_DEFAULT_VALUE, "Unsupported default value that requires ARRAY JOIN action"); auto block = actions->getSampleBlock(); diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 00b01781007..9e4f543db43 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -86,7 +86,7 @@ Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & typ { From value = from.get(); if (!type.canStoreWhole(value)) - throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); T scaled_value = type.getScaleMultiplier() * T(static_cast(value)); return DecimalField(scaled_value, type.getScale()); @@ -114,7 +114,7 @@ Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & t { From value = from.get(); if (!type.canStoreWhole(value)) - throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); //String sValue = convertFieldToString(from); //int fromScale = sValue.length()- sValue.find('.') - 1; @@ -321,8 +321,8 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID size_t dst_tuple_size = type_tuple->getElements().size(); if (dst_tuple_size != src_tuple_size) - throw Exception("Bad size of tuple in IN or VALUES section. Expected size: " - + toString(dst_tuple_size) + ", actual size: " + toString(src_tuple_size), ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Bad size of tuple in IN or VALUES section. " + "Expected size: {}, actual size: {}", dst_tuple_size, src_tuple_size); Tuple res(dst_tuple_size); bool have_unconvertible_element = false; @@ -401,7 +401,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID const auto & name = src.get().name; if (agg_func_type->getName() != name) - throw Exception("Cannot convert " + name + " to " + agg_func_type->getName(), ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert {} to {}", name, agg_func_type->getName()); return src; } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index a5cdbf78070..6aa89426916 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -86,12 +86,14 @@ std::pair> evaluateConstantExpression(co expr_for_constant_folding->execute(block_with_constants); if (!block_with_constants || block_with_constants.rows() == 0) - throw Exception("Logical error: empty block after evaluation of constant expression for IN, VALUES or LIMIT or aggregate function parameter", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Logical error: empty block after evaluation " + "of constant expression for IN, VALUES or LIMIT or aggregate function parameter"); if (!block_with_constants.has(name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column not found): {}", name); + "Element of set in IN, VALUES or LIMIT or aggregate function parameter " + "is not a constant expression (result column not found): {}", name); const ColumnWithTypeAndName & result = block_with_constants.getByName(name); const IColumn & result_column = *result.column; @@ -99,7 +101,8 @@ std::pair> evaluateConstantExpression(co /// Expressions like rand() or now() are not constant if (!isColumnConst(result_column)) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column is not const): {}", name); + "Element of set in IN, VALUES or LIMIT or aggregate function parameter " + "is not a constant expression (result column is not const): {}", name); return std::make_pair(result_column[0], result.type); } diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 454474dde2b..60d16eda6ba 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -68,18 +68,18 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, auto * query = dynamic_cast(query_ptr.get()); if (!query) { - throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed execution is not supported for such DDL queries"); } if (!context->getSettingsRef().allow_distributed_ddl) - throw Exception("Distributed DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); + throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Distributed DDL queries are prohibited for the user"); if (const auto * query_alter = query_ptr->as()) { for (const auto & command : query_alter->command_list->children) { if (!isSupportedAlterType(command->as().type)) - throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query"); } } @@ -100,7 +100,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, /// Enumerate hosts which will be used to send query. auto addresses = cluster->filterAddressesByShardOrReplica(params.only_shard_num, params.only_replica_num); if (addresses.empty()) - throw Exception("No hosts defined to execute distributed DDL query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts defined to execute distributed DDL query"); std::vector hosts; hosts.reserve(addresses.size()); @@ -133,7 +133,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, assert(use_local_default_database || !host_default_databases.empty()); if (use_local_default_database && !host_default_databases.empty()) - throw Exception("Mixed local default DB and shard default DB in DDL query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mixed local default DB and shard default DB in DDL query"); if (use_local_default_database) { @@ -392,15 +392,14 @@ Chunk DDLQueryStatusSource::generate() size_t num_unfinished_hosts = waiting_hosts.size() - num_hosts_finished; size_t num_active_hosts = current_active_hosts.size(); - constexpr const char * msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. " + constexpr auto msg_format = "Watching task {} is executing longer than distributed_ddl_task_timeout (={}) seconds. " "There are {} unfinished hosts ({} of them are currently active), " "they are going to execute the query in background"; if (throw_on_timeout) { if (!first_exception) - first_exception = std::make_unique( - fmt::format(msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts), - ErrorCodes::TIMEOUT_EXCEEDED); + first_exception = std::make_unique(Exception(ErrorCodes::TIMEOUT_EXCEEDED, + msg_format, node_path, timeout_seconds, num_unfinished_hosts, num_active_hosts)); /// For Replicated database print a list of unfinished hosts as well. Will return empty block on next iteration. if (is_replicated_database) @@ -423,12 +422,10 @@ Chunk DDLQueryStatusSource::generate() /// Paradoxically, this exception will be throw even in case of "never_throw" mode. if (!first_exception) - first_exception = std::make_unique( - fmt::format( + first_exception = std::make_unique(Exception(ErrorCodes::UNFINISHED, "Cannot provide query execution status. The query's node {} has been deleted by the cleaner" " since it was finished (or its lifetime is expired)", - node_path), - ErrorCodes::UNFINISHED); + node_path)); return {}; } @@ -464,8 +461,8 @@ Chunk DDLQueryStatusSource::generate() throw Exception(ErrorCodes::LOGICAL_ERROR, "There was an error on {}: {} (probably it's a bug)", host_id, status.message); auto [host, port] = parseHostAndPort(host_id); - first_exception = std::make_unique( - fmt::format("There was an error on [{}:{}]: {}", host, port, status.message), status.code); + first_exception = std::make_unique(Exception(status.code, + "There was an error on [{}:{}]: {}", host, port, status.message)); } ++num_hosts_finished; diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index a5fd2a121b0..f46adf91ee0 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -187,12 +188,12 @@ static void logException(ContextPtr context, QueryLogElement & elem) message.format_string = elem.exception_format_string; if (elem.stack_trace.empty()) - message.message = fmt::format("{} (from {}){} (in query: {})", elem.exception, + message.text = fmt::format("{} (from {}){} (in query: {})", elem.exception, context->getClientInfo().current_address.toString(), comment, toOneLineQuery(elem.query)); else - message.message = fmt::format( + message.text = fmt::format( "{} (from {}){} (in query: {}), Stack trace (when copying this message, always include the lines below):\n\n{}", elem.exception, context->getClientInfo().current_address.toString(), @@ -246,7 +247,7 @@ static void onExceptionBeforeStart( elem.exception_code = getCurrentExceptionCode(); auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - elem.exception = std::move(exception_message.message); + elem.exception = std::move(exception_message.text); elem.exception_format_string = exception_message.format_string; elem.client_info = context->getClientInfo(); @@ -707,10 +708,59 @@ static std::tuple executeQueryImpl( if (OpenTelemetry::CurrentContext().isTraceEnabled()) { auto * raw_interpreter_ptr = interpreter.get(); - std::string class_name(demangle(typeid(*raw_interpreter_ptr).name())); + String class_name(demangle(typeid(*raw_interpreter_ptr).name())); span = std::make_unique(class_name + "::execute()"); } + res = interpreter->execute(); + + /// If + /// - it is a SELECT query, + /// - passive (read) use of the query cache is enabled, and + /// - the query cache knows the query result + /// then replace the pipeline by a new pipeline with a single source that is populated from the query cache + auto query_cache = context->getQueryCache(); + bool read_result_from_query_cache = false; /// a query must not read from *and* write to the query cache at the same time + if (query_cache != nullptr + && (settings.allow_experimental_query_cache && settings.use_query_cache && settings.enable_reads_from_query_cache) + && res.pipeline.pulling()) + { + QueryCache::Key key( + ast, res.pipeline.getHeader(), + std::make_optional(context->getUserName()), + std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl)); + QueryCache::Reader reader = query_cache->createReader(key); + if (reader.hasCacheEntryForKey()) + { + res.pipeline = QueryPipeline(reader.getPipe()); + read_result_from_query_cache = true; + } + } + + /// If + /// - it is a SELECT query, and + /// - active (write) use of the query cache is enabled + /// then add a processor on top of the pipeline which stores the result in the query cache. + if (!read_result_from_query_cache + && query_cache != nullptr + && settings.allow_experimental_query_cache && settings.use_query_cache && settings.enable_writes_to_query_cache + && res.pipeline.pulling() + && (!astContainsNonDeterministicFunctions(ast, context) || settings.query_cache_store_results_of_queries_with_nondeterministic_functions)) + { + QueryCache::Key key( + ast, res.pipeline.getHeader(), + settings.query_cache_share_between_users ? std::nullopt : std::make_optional(context->getUserName()), + std::chrono::system_clock::now() + std::chrono::seconds(settings.query_cache_ttl)); + + const size_t num_query_runs = query_cache->recordQueryRun(key); + if (num_query_runs > settings.query_cache_min_query_runs) + { + auto stream_in_query_cache_transform = std::make_shared(res.pipeline.getHeader(), query_cache, key, + std::chrono::milliseconds(context->getSettings().query_cache_min_query_duration.totalMilliseconds())); + res.pipeline.streamIntoQueryCache(stream_in_query_cache_transform); + } + } + } } @@ -858,6 +908,10 @@ static std::tuple executeQueryImpl( auto finish_callback = [elem, context, ast, + allow_experimental_query_cache = settings.allow_experimental_query_cache, + use_query_cache = settings.use_query_cache, + enable_writes_to_query_cache = settings.enable_writes_to_query_cache, + query_cache_store_results_of_queries_with_nondeterministic_functions = settings.query_cache_store_results_of_queries_with_nondeterministic_functions, log_queries, log_queries_min_type = settings.log_queries_min_type, log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), @@ -867,6 +921,17 @@ static std::tuple executeQueryImpl( pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { + /// If active (write) use of the query cache is enabled and the query is eligible for result caching, then store the query + /// result buffered in the special-purpose cache processor (added on top of the pipeline) into the cache. + auto query_cache = context->getQueryCache(); + if (query_cache != nullptr + && pulling_pipeline + && allow_experimental_query_cache && use_query_cache && enable_writes_to_query_cache + && (!astContainsNonDeterministicFunctions(ast, context) || query_cache_store_results_of_queries_with_nondeterministic_functions)) + { + query_pipeline.finalizeWriteInQueryCache(); + } + QueryStatusPtr process_list_elem = context->getProcessListElement(); if (process_list_elem) @@ -1024,7 +1089,7 @@ static std::tuple executeQueryImpl( elem.type = QueryLogElementType::EXCEPTION_WHILE_PROCESSING; elem.exception_code = getCurrentExceptionCode(); auto exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - elem.exception = std::move(exception_message.message); + elem.exception = std::move(exception_message.text); elem.exception_format_string = exception_message.format_string; QueryStatusPtr process_list_elem = context->getProcessListElement(); @@ -1199,7 +1264,7 @@ void executeQuery( if (ast_query_with_output && ast_query_with_output->out_file) { if (!allow_into_outfile) - throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED); + throw Exception(ErrorCodes::INTO_OUTFILE_NOT_ALLOWED, "INTO OUTFILE is not allowed"); const auto & out_file = typeid_cast(*ast_query_with_output->out_file).value.safeGet(); diff --git a/src/Interpreters/getClusterName.cpp b/src/Interpreters/getClusterName.cpp index dc3e9b41628..ea053d356d2 100644 --- a/src/Interpreters/getClusterName.cpp +++ b/src/Interpreters/getClusterName.cpp @@ -21,7 +21,7 @@ std::string getClusterName(const IAST & node) { auto name = tryGetClusterName(node); if (!name) - throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal expression instead of cluster name."); return std::move(name).value(); } diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 48acfb5512a..d40cbd9366e 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -132,7 +132,7 @@ Block getHeaderForProcessingStage( return InterpreterSelectQuery(query, context, std::move(pipe), SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } } - throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical Error: unknown processed stage."); } } diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index cc38ea76d5d..5f00be07fa5 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -50,7 +50,7 @@ std::shared_ptr interpretSubquery( const auto * table = table_expression->as(); if (!subquery && !table && !function) - throw Exception("Table expression is undefined, Method: ExpressionAnalyzer::interpretSubquery." , ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table expression is undefined, Method: ExpressionAnalyzer::interpretSubquery."); /** The subquery in the IN / JOIN section does not have any restrictions on the maximum size of the result. * Because the result of this query is not the result of the entire query. diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index cae2691ca1f..c009808de3f 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -42,8 +42,9 @@ inline bool checkFunctionIsInOrGlobalInOperator(const ASTFunction & func) { size_t num_arguments = func.arguments->children.size(); if (num_arguments != 2) - throw Exception("Wrong number of arguments passed to function in. Expected: 2, passed: " + std::to_string(num_arguments), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments passed to function in. Expected: 2, passed: {}", + num_arguments); return true; } diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index c44259a3ccc..9e6326b431a 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -83,7 +83,7 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con auto * columns_list = dynamic_cast(columns_list_raw.get()); if (!columns_list) - throw Exception("Could not cast AST to ASTExpressionList", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast AST to ASTExpressionList"); auto columns = InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false); auto validation_settings = DataTypeValidationSettings(context->getSettingsRef()); diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 4343e8c7fc6..2ae5edc43b9 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -103,10 +103,8 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c if (isCollationRequired(sort_column_description)) { if (!column->isCollationSupported()) - throw Exception( - "Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, " - "containing them.", - ErrorCodes::BAD_COLLATION); + throw Exception(ErrorCodes::BAD_COLLATION, "Collations could be specified only for String, LowCardinality(String), " + "Nullable(String) or for Array or Tuple, containing them."); } result.emplace_back(ColumnWithSortDescription{column, sort_column_description, isColumnConst(*column)}); diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index c488a54c1ec..7990c538c03 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -10,16 +10,16 @@ namespace DB { /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously. -template -using ThreadPoolCallbackRunner = std::function(std::function &&, size_t priority)>; +template > +using ThreadPoolCallbackRunner = std::function(Callback &&, int64_t priority)>; /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'. -template -ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) +template > +ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) { - return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future + return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](Callback && callback, int64_t priority) mutable -> std::future { - auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result + auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() mutable -> Result { if (thread_group) CurrentThread::attachTo(thread_group); @@ -43,4 +43,11 @@ ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, con }; } +template +std::future scheduleFromThreadPool(T && task, ThreadPool & pool, const std::string & thread_name, int64_t priority = 0) +{ + auto schedule = threadPoolCallbackRunner(pool, thread_name); + return schedule(std::move(task), priority); +} + } diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 80801278963..5d347446d37 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -487,7 +487,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & rename_to->formatImpl(settings, state, frame); } else - throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected type of ALTER"); } bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const diff --git a/src/Parsers/ASTColumnsMatcher.cpp b/src/Parsers/ASTColumnsMatcher.cpp index d301394cc54..940030577d6 100644 --- a/src/Parsers/ASTColumnsMatcher.cpp +++ b/src/Parsers/ASTColumnsMatcher.cpp @@ -69,9 +69,8 @@ void ASTColumnsRegexpMatcher::setPattern(String pattern) original_pattern = std::move(pattern); column_matcher = std::make_shared(original_pattern, RE2::Quiet); if (!column_matcher->ok()) - throw DB::Exception( - "COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), - DB::ErrorCodes::CANNOT_COMPILE_REGEXP); + throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP, + "COLUMNS pattern {} cannot be compiled: {}", original_pattern, column_matcher->error()); } const String & ASTColumnsRegexpMatcher::getPattern() const @@ -177,9 +176,8 @@ void ASTQualifiedColumnsRegexpMatcher::setPattern(String pattern, bool set_match column_matcher = std::make_shared(original_pattern, RE2::Quiet); if (!column_matcher->ok()) - throw DB::Exception( - "COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), - DB::ErrorCodes::CANNOT_COMPILE_REGEXP); + throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP, + "COLUMNS pattern {} cannot be compiled: {}", original_pattern, column_matcher->error()); } void ASTQualifiedColumnsRegexpMatcher::setMatcher(std::shared_ptr matcher) diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index f3bbeb6167b..769503fb7fe 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -263,9 +263,8 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const std::for_each(expected_columns.begin(), expected_columns.end(), [&](String x) { expected_columns_str += (" " + x) ; }); - throw Exception( - "Columns transformer EXCEPT expects following column(s) :" + expected_columns_str, - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Columns transformer EXCEPT expects following column(s) :{}", + expected_columns_str); } } @@ -274,9 +273,8 @@ void ASTColumnsExceptTransformer::setPattern(String pattern) original_pattern = std::move(pattern); column_matcher = std::make_shared(original_pattern, RE2::Quiet); if (!column_matcher->ok()) - throw DB::Exception( - "COLUMNS pattern " + original_pattern + " cannot be compiled: " + column_matcher->error(), - DB::ErrorCodes::CANNOT_COMPILE_REGEXP); + throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP, "COLUMNS pattern {} cannot be compiled: {}", + original_pattern, column_matcher->error()); } const std::shared_ptr & ASTColumnsExceptTransformer::getMatcher() const @@ -377,9 +375,8 @@ void ASTColumnsReplaceTransformer::transform(ASTs & nodes) const { auto & replacement = replace_child->as(); if (replace_map.find(replacement.name) != replace_map.end()) - throw Exception( - "Expressions in columns transformer REPLACE should not contain the same replacement more than once", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Expressions in columns transformer REPLACE should not contain the same replacement more than once"); replace_map.emplace(replacement.name, replacement.expr); } @@ -419,9 +416,8 @@ void ASTColumnsReplaceTransformer::transform(ASTs & nodes) const expected_columns += ", "; expected_columns += elem.first; } - throw Exception( - "Columns transformer REPLACE expects following column(s) : " + expected_columns, - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Columns transformer REPLACE expects following column(s) : {}", + expected_columns); } } diff --git a/src/Parsers/ASTDeleteQuery.cpp b/src/Parsers/ASTDeleteQuery.cpp index 08b40f65121..09dc4b936ae 100644 --- a/src/Parsers/ASTDeleteQuery.cpp +++ b/src/Parsers/ASTDeleteQuery.cpp @@ -41,6 +41,8 @@ void ASTDeleteQuery::formatQueryImpl(const FormatSettings & settings, FormatStat } settings.ostr << backQuoteIfNeed(getTable()); + formatOnCluster(settings); + settings.ostr << (settings.hilite ? hilite_keyword : "") << " WHERE " << (settings.hilite ? hilite_none : ""); predicate->formatImpl(settings, state, frame); } diff --git a/src/Parsers/ASTDeleteQuery.h b/src/Parsers/ASTDeleteQuery.h index bcb97639b64..1dab684ffc9 100644 --- a/src/Parsers/ASTDeleteQuery.h +++ b/src/Parsers/ASTDeleteQuery.h @@ -2,15 +2,20 @@ #include #include +#include namespace DB { /// DELETE FROM [db.]name WHERE ... -class ASTDeleteQuery : public ASTQueryWithTableAndOutput +class ASTDeleteQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster { public: String getID(char delim) const final; ASTPtr clone() const final; + ASTPtr getRewrittenASTWithoutOnCluster(const WithoutOnClusterASTRewriteParams & params) const override + { + return removeOnCluster(clone(), params.default_database); + } ASTPtr predicate; diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index 11c1dd4c47a..93a4b547025 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -21,7 +21,7 @@ String ASTDropQuery::getID(char delim) const else if (kind == ASTDropQuery::Kind::Truncate) return "TruncateQuery" + (delim + getDatabase()) + delim + getTable(); else - throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Not supported kind of drop query."); } ASTPtr ASTDropQuery::clone() const @@ -42,7 +42,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState else if (kind == ASTDropQuery::Kind::Truncate) settings.ostr << "TRUNCATE "; else - throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Not supported kind of drop query."); if (temporary) settings.ostr << "TEMPORARY "; diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 4ac4bb6144e..7a19cba0f75 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -37,81 +37,118 @@ namespace { /// Finds arguments of a specified function which should not be displayed for most users for security reasons. /// That involves passwords and secret keys. - /// The member function getRange() returns a pair of numbers [first, last) specifying arguments - /// which must be hidden. If the function returns {-1, -1} that means no arguments must be hidden. class FunctionSecretArgumentsFinder { public: explicit FunctionSecretArgumentsFinder(const ASTFunction & function_) : function(function_) { - if (function.arguments) - { - if (const auto * expr_list = function.arguments->as()) - arguments = &expr_list->children; - } - } + if (!function.arguments) + return; - std::pair getRange() const - { - if (!arguments) - return npos; + const auto * expr_list = function.arguments->as(); + if (!expr_list) + return; + arguments = &expr_list->children; switch (function.kind) { - case ASTFunction::Kind::ORDINARY_FUNCTION: return findOrdinaryFunctionSecretArguments(); - case ASTFunction::Kind::WINDOW_FUNCTION: return npos; - case ASTFunction::Kind::LAMBDA_FUNCTION: return npos; - case ASTFunction::Kind::TABLE_ENGINE: return findTableEngineSecretArguments(); - case ASTFunction::Kind::DATABASE_ENGINE: return findDatabaseEngineSecretArguments(); - case ASTFunction::Kind::BACKUP_NAME: return findBackupNameSecretArguments(); + case ASTFunction::Kind::ORDINARY_FUNCTION: findOrdinaryFunctionSecretArguments(); break; + case ASTFunction::Kind::WINDOW_FUNCTION: break; + case ASTFunction::Kind::LAMBDA_FUNCTION: break; + case ASTFunction::Kind::TABLE_ENGINE: findTableEngineSecretArguments(); break; + case ASTFunction::Kind::DATABASE_ENGINE: findDatabaseEngineSecretArguments(); break; + case ASTFunction::Kind::BACKUP_NAME: findBackupNameSecretArguments(); break; } } - static const constexpr std::pair npos{static_cast(-1), static_cast(-1)}; + struct Result + { + /// Result constructed by default means no arguments will be hidden. + size_t start = static_cast(-1); + size_t count = 0; /// Mostly it's either 0 or 1. There are only a few cases where `count` can be greater than 1 (e.g. see `encrypt`). + /// In all known cases secret arguments are consecutive + bool are_named = false; /// Arguments like `password = 'password'` are considered as named arguments. + }; + + Result getResult() const { return result; } private: - std::pair findOrdinaryFunctionSecretArguments() const + const ASTFunction & function; + const ASTs * arguments = nullptr; + Result result; + + void markSecretArgument(size_t index, bool argument_is_named = false) + { + if (!result.count) + { + result.start = index; + result.are_named = argument_is_named; + } + chassert(index >= result.start); /// We always check arguments consecutively + result.count = index + 1 - result.start; + if (!argument_is_named) + result.are_named = false; + } + + void findOrdinaryFunctionSecretArguments() { if ((function.name == "mysql") || (function.name == "postgresql") || (function.name == "mongodb")) { /// mysql('host:port', 'database', 'table', 'user', 'password', ...) /// postgresql('host:port', 'database', 'table', 'user', 'password', ...) /// mongodb('host:port', 'database', 'collection', 'user', 'password', ...) - return {4, 5}; + findMySQLFunctionSecretArguments(); } else if ((function.name == "s3") || (function.name == "cosn") || (function.name == "oss")) { /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) - return findS3FunctionSecretArguments(/* is_cluster_function= */ false); + findS3FunctionSecretArguments(/* is_cluster_function= */ false); } else if (function.name == "s3Cluster") { /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', ...) - return findS3FunctionSecretArguments(/* is_cluster_function= */ true); + findS3FunctionSecretArguments(/* is_cluster_function= */ true); } else if ((function.name == "remote") || (function.name == "remoteSecure")) { /// remote('addresses_expr', 'db', 'table', 'user', 'password', ...) - return findRemoteFunctionSecretArguments(); + findRemoteFunctionSecretArguments(); } else if ((function.name == "encrypt") || (function.name == "decrypt") || (function.name == "aes_encrypt_mysql") || (function.name == "aes_decrypt_mysql") || (function.name == "tryDecrypt")) { /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) - return findEncryptionFunctionSecretArguments(); - } - else - { - return npos; + findEncryptionFunctionSecretArguments(); } } - std::pair findS3FunctionSecretArguments(bool is_cluster_function) const + void findMySQLFunctionSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// mysql(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// mysql('host:port', 'database', 'table', 'user', 'password', ...) + markSecretArgument(4); + } + } + + void findS3FunctionSecretArguments(bool is_cluster_function) { /// s3Cluster('cluster_name', 'url', ...) has 'url' as its second argument. size_t url_arg_idx = is_cluster_function ? 1 : 0; + if (!is_cluster_function && isNamedCollectionName(0)) + { + /// s3(named_collection, ..., secret_access_key = 'secret_access_key', ...) + findSecretNamedArgument("secret_access_key", 1); + return; + } + /// We're going to replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures: /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) /// s3Cluster('cluster_name', 'url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') @@ -119,12 +156,12 @@ namespace /// But we should check the number of arguments first because we don't need to do any replacements in case of /// s3('url' [, 'format']) or s3Cluster('cluster_name', 'url' [, 'format']) if (arguments->size() < url_arg_idx + 3) - return npos; + return; if (arguments->size() >= url_arg_idx + 5) { /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'structure', ...) - return {url_arg_idx + 2, url_arg_idx + 3}; + markSecretArgument(url_arg_idx + 2); } else { @@ -136,15 +173,16 @@ namespace { /// We couldn't evaluate the argument after 'url' so we don't know whether it is a format or `aws_access_key_id`. /// So it's safer to wipe the next argument just in case. - return {url_arg_idx + 2, url_arg_idx + 3}; /// Wipe either `aws_secret_access_key` or `structure`. + markSecretArgument(url_arg_idx + 2); /// Wipe either `aws_secret_access_key` or `structure`. + return; } if (KnownFormatNames::instance().exists(format)) - return npos; /// The argument after 'url' is a format: s3('url', 'format', ...) + return; /// The argument after 'url' is a format: s3('url', 'format', ...) /// The argument after 'url' is not a format so we do our replacement: /// s3('url', 'aws_access_key_id', 'aws_secret_access_key', ...) -> s3('url', 'aws_access_key_id', '[HIDDEN]', ...) - return {url_arg_idx + 2, url_arg_idx + 3}; + markSecretArgument(url_arg_idx + 2); } } @@ -153,8 +191,12 @@ namespace if (arg_idx >= arguments->size()) return false; - ASTPtr argument = (*arguments)[arg_idx]; - if (const auto * literal = argument->as()) + return tryGetStringFromArgument(*(*arguments)[arg_idx], res, allow_identifier); + } + + static bool tryGetStringFromArgument(const IAST & argument, String * res, bool allow_identifier = true) + { + if (const auto * literal = argument.as()) { if (literal->value.getType() != Field::Types::String) return false; @@ -165,7 +207,7 @@ namespace if (allow_identifier) { - if (const auto * id = argument->as()) + if (const auto * id = argument.as()) { if (res) *res = id->name(); @@ -176,8 +218,15 @@ namespace return false; } - std::pair findRemoteFunctionSecretArguments() const + void findRemoteFunctionSecretArguments() { + if (isNamedCollectionName(0)) + { + /// remote(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + return; + } + /// We're going to replace 'password' with '[HIDDEN'] for the following signatures: /// remote('addresses_expr', db.table, 'user' [, 'password'] [, sharding_key]) /// remote('addresses_expr', 'db', 'table', 'user' [, 'password'] [, sharding_key]) @@ -186,7 +235,7 @@ namespace /// But we should check the number of arguments first because we don't need to do any replacements in case of /// remote('addresses_expr', db.table) if (arguments->size() < 3) - return npos; + return; size_t arg_num = 1; @@ -207,20 +256,17 @@ namespace /// before the argument 'password'. So it's safer to wipe two arguments just in case. /// The last argument can be also a `sharding_key`, so we need to check that argument is a literal string /// before wiping it (because the `password` argument is always a literal string). - auto res = npos; if (tryGetStringFromArgument(arg_num + 2, nullptr, /* allow_identifier= */ false)) { /// Wipe either `password` or `user`. - res = {arg_num + 2, arg_num + 3}; + markSecretArgument(arg_num + 2); } if (tryGetStringFromArgument(arg_num + 3, nullptr, /* allow_identifier= */ false)) { /// Wipe either `password` or `sharding_key`. - if (res == npos) - res.first = arg_num + 3; - res.second = arg_num + 4; + markSecretArgument(arg_num + 3); } - return res; + return; } /// Skip the current argument (which is either a database name or a qualified table name). @@ -241,9 +287,7 @@ namespace /// before wiping it (because the `password` argument is always a literal string). bool can_be_password = tryGetStringFromArgument(arg_num, nullptr, /* allow_identifier= */ false); if (can_be_password) - return {arg_num, arg_num + 1}; - - return npos; + markSecretArgument(arg_num); } /// Tries to get either a database name or a qualified table name from an argument. @@ -278,20 +322,24 @@ namespace return true; } - std::pair findEncryptionFunctionSecretArguments() const + void findEncryptionFunctionSecretArguments() { + if (arguments->empty()) + return; + /// We replace all arguments after 'mode' with '[HIDDEN]': /// encrypt('mode', 'plaintext', 'key' [, iv, aad]) -> encrypt('mode', '[HIDDEN]') - return {1, arguments->size()}; + result.start = 1; + result.count = arguments->size() - 1; } - std::pair findTableEngineSecretArguments() const + void findTableEngineSecretArguments() { const String & engine_name = function.name; if (engine_name == "ExternalDistributed") { /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') - return {5, 6}; + findExternalDistributedTableEngineSecretArguments(); } else if ((engine_name == "MySQL") || (engine_name == "PostgreSQL") || (engine_name == "MaterializedPostgreSQL") || (engine_name == "MongoDB")) @@ -300,21 +348,38 @@ namespace /// PostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) /// MaterializedPostgreSQL('host:port', 'database', 'table', 'user', 'password', ...) /// MongoDB('host:port', 'database', 'collection', 'user', 'password', ...) - return {4, 5}; + findMySQLFunctionSecretArguments(); } else if ((engine_name == "S3") || (engine_name == "COSN") || (engine_name == "OSS")) { /// S3('url', ['aws_access_key_id', 'aws_secret_access_key',] ...) - return findS3TableEngineSecretArguments(); - } - else - { - return npos; + findS3TableEngineSecretArguments(); } } - std::pair findS3TableEngineSecretArguments() const + void findExternalDistributedTableEngineSecretArguments() { + if (isNamedCollectionName(1)) + { + /// ExternalDistributed('engine', named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 2); + } + else + { + /// ExternalDistributed('engine', 'host:port', 'database', 'table', 'user', 'password') + markSecretArgument(5); + } + } + + void findS3TableEngineSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// S3(named_collection, ..., secret_access_key = 'secret_access_key') + findSecretNamedArgument("secret_access_key", 1); + return; + } + /// We replace 'aws_secret_access_key' with '[HIDDEN'] for the following signatures: /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format') /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') @@ -322,12 +387,12 @@ namespace /// But we should check the number of arguments first because we don't need to do that replacements in case of /// S3('url' [, 'format' [, 'compression']]) if (arguments->size() < 4) - return npos; + return; - return {2, 3}; + markSecretArgument(2); } - std::pair findDatabaseEngineSecretArguments() const + void findDatabaseEngineSecretArguments() { const String & engine_name = function.name; if ((engine_name == "MySQL") || (engine_name == "MaterializeMySQL") || @@ -335,31 +400,71 @@ namespace (engine_name == "MaterializedPostgreSQL")) { /// MySQL('host:port', 'database', 'user', 'password') - /// PostgreSQL('host:port', 'database', 'user', 'password', ...) - return {3, 4}; - } - else - { - return npos; + /// PostgreSQL('host:port', 'database', 'user', 'password') + findMySQLDatabaseSecretArguments(); } } - std::pair findBackupNameSecretArguments() const + void findMySQLDatabaseSecretArguments() + { + if (isNamedCollectionName(0)) + { + /// MySQL(named_collection, ..., password = 'password', ...) + findSecretNamedArgument("password", 1); + } + else + { + /// MySQL('host:port', 'database', 'user', 'password') + markSecretArgument(3); + } + } + + void findBackupNameSecretArguments() { const String & engine_name = function.name; if (engine_name == "S3") { /// BACKUP ... TO S3(url, [aws_access_key_id, aws_secret_access_key]) - return {2, 3}; - } - else - { - return npos; + markSecretArgument(2); } } - const ASTFunction & function; - const ASTs * arguments = nullptr; + /// Whether a specified argument can be the name of a named collection? + bool isNamedCollectionName(size_t arg_idx) const + { + if (arguments->size() <= arg_idx) + return false; + + const auto * identifier = (*arguments)[arg_idx]->as(); + return identifier != nullptr; + } + + /// Looks for a secret argument with a specified name. This function looks for arguments in format `key=value` where the key is specified. + void findSecretNamedArgument(const std::string_view & key, size_t start = 0) + { + for (size_t i = start; i < arguments->size(); ++i) + { + const auto & argument = (*arguments)[i]; + const auto * equals_func = argument->as(); + if (!equals_func || (equals_func->name != "equals")) + continue; + + const auto * expr_list = equals_func->arguments->as(); + if (!expr_list) + continue; + + const auto & equal_args = expr_list->children; + if (equal_args.size() != 2) + continue; + + String found_key; + if (!tryGetStringFromArgument(*equal_args[0], &found_key)) + continue; + + if (found_key == key) + markSecretArgument(i, /* argument_is_named= */ true); + } + } }; } @@ -367,7 +472,7 @@ namespace void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { if (name == "view") - throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Table function view cannot be used as an expression"); /// If function can be converted to literal it will be parsed as literal after formatting. /// In distributed query it may lead to mismathed column names. @@ -966,32 +1071,39 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format && (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne" || name == "replaceRegexpAll"); - auto secret_arguments = std::make_pair(static_cast(-1), static_cast(-1)); + FunctionSecretArgumentsFinder::Result secret_arguments; if (!settings.show_secrets) - secret_arguments = FunctionSecretArgumentsFinder(*this).getRange(); + secret_arguments = FunctionSecretArgumentsFinder{*this}.getResult(); for (size_t i = 0, size = arguments->children.size(); i < size; ++i) { if (i != 0) settings.ostr << ", "; - if (arguments->children[i]->as()) + + const auto & argument = arguments->children[i]; + if (argument->as()) settings.ostr << "SETTINGS "; - if (!settings.show_secrets && (secret_arguments.first <= i) && (i < secret_arguments.second)) + if (!settings.show_secrets && (secret_arguments.start <= i) && (i < secret_arguments.start + secret_arguments.count)) { + if (secret_arguments.are_named) + { + assert_cast(argument.get())->arguments->children[0]->formatImpl(settings, state, nested_dont_need_parens); + settings.ostr << (settings.hilite ? hilite_operator : "") << " = " << (settings.hilite ? hilite_none : ""); + } settings.ostr << "'[HIDDEN]'"; - if (size - 1 < secret_arguments.second) + if (size <= secret_arguments.start + secret_arguments.count && !secret_arguments.are_named) break; /// All other arguments should also be hidden. continue; } if ((i == 1) && special_hilite_regexp - && highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-")) + && highlightStringLiteralWithMetacharacters(argument, settings, "|()^$.[]?*+{:-")) { continue; } - arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens); + argument->formatImpl(settings, state, nested_dont_need_parens); } } @@ -1005,14 +1117,7 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format bool ASTFunction::hasSecretParts() const { - if (arguments) - { - size_t num_arguments = arguments->children.size(); - auto secret_arguments = FunctionSecretArgumentsFinder(*this).getRange(); - if ((secret_arguments.first < num_arguments) && (secret_arguments.first < secret_arguments.second)) - return true; - } - return childrenHaveSecretParts(); + return (FunctionSecretArgumentsFinder{*this}.getResult().count > 0) || childrenHaveSecretParts(); } String getFunctionName(const IAST * ast) @@ -1020,7 +1125,9 @@ String getFunctionName(const IAST * ast) String res; if (tryGetFunctionNameInto(ast, res)) return res; - throw Exception(ast ? queryToString(*ast) + " is not an function" : "AST node is nullptr", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + if (ast) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "{} is not an function", queryToString(*ast)); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "AST node is nullptr"); } std::optional tryGetFunctionName(const IAST * ast) diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 8651a52f2c1..042b4d9085d 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -255,7 +255,9 @@ String getIdentifierName(const IAST * ast) String res; if (tryGetIdentifierNameInto(ast, res)) return res; - throw Exception(ast ? queryToString(*ast) + " is not an identifier" : "AST node is nullptr", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + if (ast) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "{} is not an identifier", queryToString(*ast)); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "AST node is nullptr"); } std::optional tryGetIdentifierName(const IAST * ast) diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index dceec83e763..ecb2d4e331b 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -160,7 +160,7 @@ static void tryFindInputFunctionImpl(const ASTPtr & ast, ASTPtr & input_function if (table_function_ast->name == "input") { if (input_function) - throw Exception("You can use 'input()' function only once per request.", ErrorCodes::INVALID_USAGE_OF_INPUT); + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "You can use 'input()' function only once per request."); input_function = ast; } } diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 9b85fcb2dac..da3d9286f0a 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -115,7 +115,7 @@ void ASTProjectionSelectQuery::setExpression(Expression expr, ASTPtr && ast) ASTPtr & ASTProjectionSelectQuery::getExpression(Expression expr) { if (!positions.contains(expr)) - throw Exception("Get expression before set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Get expression before set"); return children[positions[expr]]; } diff --git a/src/Parsers/ASTQualifiedAsterisk.h b/src/Parsers/ASTQualifiedAsterisk.h index e67b4cd82dd..079b83ae171 100644 --- a/src/Parsers/ASTQualifiedAsterisk.h +++ b/src/Parsers/ASTQualifiedAsterisk.h @@ -17,8 +17,13 @@ public: ASTPtr clone() const override { auto clone = std::make_shared(*this); + clone->children.clear(); - if (transformers) { clone->transformers = transformers->clone(); clone->children.push_back(clone->transformers); } + if (transformers) + { + clone->transformers = transformers->clone(); + clone->children.push_back(clone->transformers); + } clone->qualifier = qualifier->clone(); clone->children.push_back(clone->qualifier); diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 838b2664eb3..fe8ebacec15 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -258,7 +258,7 @@ static const ASTArrayJoin * getFirstArrayJoin(const ASTSelectQuery & select) if (!array_join) array_join = tables_element.array_join->as(); else - throw Exception("Support for more than one ARRAY JOIN in query is not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Support for more than one ARRAY JOIN in query is not implemented"); } } @@ -283,7 +283,7 @@ static const ASTTablesInSelectQueryElement * getFirstTableJoin(const ASTSelectQu if (!joined_table) joined_table = &tables_element; else - throw Exception("Multiple JOIN does not support the query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Multiple JOIN does not support the query."); } } @@ -460,7 +460,7 @@ void ASTSelectQuery::setExpression(Expression expr, ASTPtr && ast) ASTPtr & ASTSelectQuery::getExpression(Expression expr) { if (!positions.contains(expr)) - throw Exception("Get expression before set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Get expression before set"); return children[positions[expr]]; } diff --git a/src/Parsers/ASTShowEngineQuery.h b/src/Parsers/ASTShowEngineQuery.h new file mode 100644 index 00000000000..7a447a4f24b --- /dev/null +++ b/src/Parsers/ASTShowEngineQuery.h @@ -0,0 +1,17 @@ +#pragma once + +#include + + +namespace DB +{ + +struct ASTShowEngineAndQueryNames +{ + static constexpr auto ID = "ShowEngineQuery"; + static constexpr auto Query = "SHOW ENGINES"; +}; + +using ASTShowEnginesQuery = ASTQueryWithOutputImpl; + +} diff --git a/src/Parsers/ASTSystemQuery.cpp b/src/Parsers/ASTSystemQuery.cpp index bfc7c5e6a45..ffb018f23fe 100644 --- a/src/Parsers/ASTSystemQuery.cpp +++ b/src/Parsers/ASTSystemQuery.cpp @@ -206,6 +206,10 @@ void ASTSystemQuery::formatImpl(const FormatSettings & settings, FormatState &, { settings.ostr << (settings.hilite ? hilite_identifier : "") << backQuoteIfNeed(backup_name); } + else if (type == Type::SYNC_FILE_CACHE) + { + settings.ostr << (settings.hilite ? hilite_none : ""); + } } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ae08fe464ad..02ddbc7dcd2 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -25,6 +25,7 @@ public: DROP_INDEX_MARK_CACHE, DROP_INDEX_UNCOMPRESSED_CACHE, DROP_MMAP_CACHE, + DROP_QUERY_CACHE, #if USE_EMBEDDED_COMPILER DROP_COMPILED_EXPRESSION_CACHE, #endif @@ -41,6 +42,7 @@ public: SYNC_REPLICA, SYNC_DATABASE_REPLICA, SYNC_TRANSACTION_LOG, + SYNC_FILE_CACHE, RELOAD_DICTIONARY, RELOAD_DICTIONARIES, RELOAD_MODEL, diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index a59b5dd472c..b0d4aef38b8 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -90,7 +90,7 @@ namespace case AuthenticationType::NO_PASSWORD: [[fallthrough]]; case AuthenticationType::MAX: - throw Exception("AST: Unexpected authentication type " + toString(auth_type), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "AST: Unexpected authentication type {}", toString(auth_type)); } if (password && !settings.show_secrets) diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp index 1d15fc272cf..f92541ec672 100644 --- a/src/Parsers/Access/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -116,9 +116,9 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F << (settings.hilite ? IAST::hilite_none : ""); if (!access_rights_elements.sameOptions()) - throw Exception("Elements of an ASTGrantQuery are expected to have the same options", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Elements of an ASTGrantQuery are expected to have the same options"); if (!access_rights_elements.empty() && access_rights_elements[0].is_partial_revoke && !is_revoke) - throw Exception("A partial revoke should be revoked, not granted", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "A partial revoke should be revoked, not granted"); bool grant_option = !access_rights_elements.empty() && access_rights_elements[0].grant_option; formatOnCluster(settings); @@ -136,7 +136,9 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F { roles->format(settings); if (!access_rights_elements.empty()) - throw Exception("ASTGrantQuery can contain either roles or access rights elements to grant or revoke, not both of them", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "ASTGrantQuery can contain either roles or access rights elements " + "to grant or revoke, not both of them"); } else formatElementsWithoutOptions(access_rights_elements, settings); diff --git a/src/Parsers/Access/ASTRowPolicyName.cpp b/src/Parsers/Access/ASTRowPolicyName.cpp index 280713fe9d9..4edfa61f10e 100644 --- a/src/Parsers/Access/ASTRowPolicyName.cpp +++ b/src/Parsers/Access/ASTRowPolicyName.cpp @@ -34,7 +34,7 @@ void ASTRowPolicyName::replaceEmptyDatabase(const String & current_database) void ASTRowPolicyNames::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { if (full_names.empty()) - throw Exception("No names of row policies in AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No names of row policies in AST"); bool same_short_name = true; if (full_names.size() > 1) diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index b86f0a6a572..a67051be398 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -71,8 +71,7 @@ namespace String all_types_str; for (auto kt : collections::range(QuotaKeyType::MAX)) all_types_str += String(all_types_str.empty() ? "" : ", ") + "'" + QuotaKeyTypeInfo::get(kt).name + "'"; - String msg = "Quota cannot be keyed by '" + name + "'. Expected one of the following identifiers: " + all_types_str; - throw Exception(msg, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Quota cannot be keyed by '{}'. Expected one of the following identifiers: {}", name, all_types_str); }); } diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 43e1cedd34d..2211969c61e 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -168,13 +168,13 @@ namespace return false; if (!element.any_column) - throw Exception(old_flags.toString() + " cannot be granted on the column level", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted on the column level", old_flags.toString()); else if (!element.any_table) - throw Exception(old_flags.toString() + " cannot be granted on the table level", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted on the table level", old_flags.toString()); else if (!element.any_database) - throw Exception(old_flags.toString() + " cannot be granted on the database level", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted on the database level", old_flags.toString()); else - throw Exception(old_flags.toString() + " cannot be granted", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted", old_flags.toString()); }); } @@ -281,9 +281,9 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) parseOnCluster(pos, expected, cluster); if (grant_option && roles) - throw Exception("GRANT OPTION should be specified for access types", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "GRANT OPTION should be specified for access types"); if (admin_option && !elements.empty()) - throw Exception("ADMIN OPTION should be specified for roles", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "ADMIN OPTION should be specified for roles"); if (grant_option) { diff --git a/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp b/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp index aaa65450d20..15622062961 100644 --- a/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp @@ -150,7 +150,7 @@ bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expe break; } case AccessEntityType::MAX: - throw Exception("Type " + toString(type) + " is not implemented in SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type {} is not implemented in SHOW CREATE query", toString(type)); } auto query = std::make_shared(); diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 231897605e0..855e452e3c7 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -121,7 +121,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) const auto & explain_query = explain_node->as(); if (explain_query.getTableFunction() || explain_query.getTableOverride()) - throw Exception("EXPLAIN in a subquery cannot have a table function or table override", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); /// Replace subquery `(EXPLAIN SELECT ...)` /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` @@ -132,7 +132,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (ASTPtr settings_ast = explain_query.getSettings()) { if (!settings_ast->as()) - throw Exception("EXPLAIN settings must be a SET query", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN settings must be a SET query"); settings_str = queryToString(settings_ast); } @@ -868,7 +868,9 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (str_end == buf.c_str() + buf.size() && errno != ERANGE) { if (float_value < 0) - throw Exception("Logical error: token number cannot begin with minus, but parsed float number is less than zero.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Logical error: token number cannot begin with minus, " + "but parsed float number is less than zero."); if (negative) float_value = -float_value; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 2e20a68f9b1..054a22a0c3a 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -933,8 +933,8 @@ public: && contents_begin[9] >= '0' && contents_begin[9] <= '9') { std::string contents_str(contents_begin, contents_end - contents_begin); - throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" - , ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Argument of function toDate is unquoted: " + "toDate({}), must be: toDate('{}')" , contents_str, contents_str); } if (allow_function_parameters && !parameters && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 31ba7ac4f86..3692a4c73e5 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -58,7 +58,7 @@ private: { const auto * cast_expression = assert_cast(function->arguments.get()); if (cast_expression->children.size() != 2) - throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); if (const auto * cast_literal = cast_expression->children[0]->as()) { parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 064bcc9a59e..869c0969dd6 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -108,7 +108,7 @@ size_t IAST::checkSize(size_t max_size) const res += child->checkSize(max_size); if (res > max_size) - throw Exception("AST is too big. Maximum: " + toString(max_size), ErrorCodes::TOO_BIG_AST); + throw Exception(ErrorCodes::TOO_BIG_AST, "AST is too big. Maximum: {}", max_size); return res; } @@ -156,7 +156,7 @@ size_t IAST::checkDepthImpl(size_t max_depth) const stack.pop_back(); if (top.second >= max_depth) - throw Exception("AST is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_AST); + throw Exception(ErrorCodes::TOO_DEEP_AST, "AST is too deep. Maximum: {}", max_depth); res = std::max(res, top.second); @@ -218,8 +218,9 @@ void IAST::FormatSettings::writeIdentifier(const String & name) const case IdentifierQuotingStyle::None: { if (always_quote_identifiers) - throw Exception("Incompatible arguments: always_quote_identifiers = true && identifier_quoting_style == IdentifierQuotingStyle::None", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Incompatible arguments: always_quote_identifiers = true && " + "identifier_quoting_style == IdentifierQuotingStyle::None"); writeString(name, ostr); break; } @@ -258,7 +259,7 @@ void IAST::dumpTree(WriteBuffer & ostr, size_t indent) const writeChar('\n', ostr); for (const auto & child : children) { - if (!child) throw Exception("Can't dump nullptr child", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + if (!child) throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST, "Can't dump nullptr child"); child->dumpTree(ostr, indent + 1); } } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index fd987d4b48e..c1520a6fca7 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -48,12 +48,12 @@ public: virtual void appendColumnName(WriteBuffer &) const { - throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get name of not a column: {}", getID()); } virtual void appendColumnNameWithoutAlias(WriteBuffer &) const { - throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get name of not a column: {}", getID()); } /** Get the alias, if any, or the canonical name of the column, if it is not. */ @@ -65,7 +65,7 @@ public: /** Set the alias. */ virtual void setAlias(const String & /*to*/) { - throw Exception("Can't set alias of " + getColumnName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't set alias of {}", getColumnName()); } /** Get the text that identifies this element. */ @@ -119,7 +119,7 @@ public: T * casted = dynamic_cast(child.get()); if (!casted) - throw Exception("Could not cast AST subtree", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast AST subtree"); children.push_back(child); field = casted; @@ -129,11 +129,11 @@ public: void replace(T * & field, const ASTPtr & child) { if (!child) - throw Exception("Trying to replace AST subtree with nullptr", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to replace AST subtree with nullptr"); T * casted = dynamic_cast(child.get()); if (!casted) - throw Exception("Could not cast AST subtree", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast AST subtree"); for (ASTPtr & current_child : children) { @@ -145,7 +145,7 @@ public: } } - throw Exception("AST subtree not found in children", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "AST subtree not found in children"); } template @@ -169,7 +169,7 @@ public: }); if (child == children.end()) - throw Exception("AST subtree not found in children", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "AST subtree not found in children"); children.erase(child); field = nullptr; @@ -237,7 +237,7 @@ public: virtual void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const { - throw Exception("Unknown element in AST: " + getID(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown element in AST: {}", getID()); } // A simple way to add some user-readable context to an error message. diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 466cdf7a4b1..d5fdf6b7eaa 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -70,15 +70,14 @@ public: { ++depth; if (unlikely(max_depth > 0 && depth > max_depth)) - throw Exception( - "Maximum parse depth (" + std::to_string(max_depth) + ") exceeded. Consider rising max_parser_depth parameter.", - ErrorCodes::TOO_DEEP_RECURSION); + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum parse depth ({}) exceeded. " + "Consider rising max_parser_depth parameter.", max_depth); } ALWAYS_INLINE void decreaseDepth() { if (unlikely(depth == 0)) - throw Exception("Logical error in parser: incorrect calculation of parse depth", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in parser: incorrect calculation of parse depth"); --depth; } }; diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index f8e4f9eaab0..1575cffcc39 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -20,7 +20,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Po ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); auto haystack = tokens.back(); @@ -55,7 +55,7 @@ String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); --token_pos; --token_pos; @@ -115,7 +115,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; } else - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); tokens.pop_back(); return new_expr; } @@ -135,7 +135,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) - throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator"); op ="!"+String(pos->begin,pos->end); } else if (token == "matches") diff --git a/src/Parsers/MySQL/ASTAlterCommand.h b/src/Parsers/MySQL/ASTAlterCommand.h index 933a9700c70..f097ed71219 100644 --- a/src/Parsers/MySQL/ASTAlterCommand.h +++ b/src/Parsers/MySQL/ASTAlterCommand.h @@ -78,7 +78,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTAlterCommand.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTAlterCommand."); } }; diff --git a/src/Parsers/MySQL/ASTAlterQuery.h b/src/Parsers/MySQL/ASTAlterQuery.h index a6987acb327..161e5e40086 100644 --- a/src/Parsers/MySQL/ASTAlterQuery.h +++ b/src/Parsers/MySQL/ASTAlterQuery.h @@ -30,7 +30,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTAlterQuery.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTAlterQuery."); } }; diff --git a/src/Parsers/MySQL/ASTCreateDefines.h b/src/Parsers/MySQL/ASTCreateDefines.h index 95fb7716f3a..3d2a79568ab 100644 --- a/src/Parsers/MySQL/ASTCreateDefines.h +++ b/src/Parsers/MySQL/ASTCreateDefines.h @@ -29,7 +29,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTCreateDefines.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTCreateDefines."); } }; diff --git a/src/Parsers/MySQL/ASTCreateQuery.h b/src/Parsers/MySQL/ASTCreateQuery.h index ceacdd2cd41..d01bed6b7d6 100644 --- a/src/Parsers/MySQL/ASTCreateQuery.h +++ b/src/Parsers/MySQL/ASTCreateQuery.h @@ -34,7 +34,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTCreateQuery.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTCreateQuery."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareColumn.h b/src/Parsers/MySQL/ASTDeclareColumn.h index 6f9f50225a2..30e16b7b84c 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.h +++ b/src/Parsers/MySQL/ASTDeclareColumn.h @@ -28,7 +28,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareColumn.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareColumn."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareConstraint.h b/src/Parsers/MySQL/ASTDeclareConstraint.h index 8b0153bcd53..82f2597bc4d 100644 --- a/src/Parsers/MySQL/ASTDeclareConstraint.h +++ b/src/Parsers/MySQL/ASTDeclareConstraint.h @@ -28,7 +28,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareConstraint.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareConstraint."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareIndex.h b/src/Parsers/MySQL/ASTDeclareIndex.h index faa8f1378dd..5399c394137 100644 --- a/src/Parsers/MySQL/ASTDeclareIndex.h +++ b/src/Parsers/MySQL/ASTDeclareIndex.h @@ -32,7 +32,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareIndex.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareIndex."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index c493c49c61b..6e248b647c9 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -40,7 +40,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareOptions.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareOptions."); } }; diff --git a/src/Parsers/MySQL/ASTDeclarePartition.h b/src/Parsers/MySQL/ASTDeclarePartition.h index 232fea57561..f2c4103a1ba 100644 --- a/src/Parsers/MySQL/ASTDeclarePartition.h +++ b/src/Parsers/MySQL/ASTDeclarePartition.h @@ -30,7 +30,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclarePartition.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclarePartition."); } }; diff --git a/src/Parsers/MySQL/ASTDeclarePartitionOptions.h b/src/Parsers/MySQL/ASTDeclarePartitionOptions.h index 9e29a5cbbae..cee2d449291 100644 --- a/src/Parsers/MySQL/ASTDeclarePartitionOptions.h +++ b/src/Parsers/MySQL/ASTDeclarePartitionOptions.h @@ -32,7 +32,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclarePartitionOptions.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclarePartitionOptions."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareReference.h b/src/Parsers/MySQL/ASTDeclareReference.h index c003bd7a16c..b04bf52e51c 100644 --- a/src/Parsers/MySQL/ASTDeclareReference.h +++ b/src/Parsers/MySQL/ASTDeclareReference.h @@ -46,7 +46,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareReference.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareReference."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareSubPartition.h b/src/Parsers/MySQL/ASTDeclareSubPartition.h index 4f00a39c99a..82a1a5a3b0b 100644 --- a/src/Parsers/MySQL/ASTDeclareSubPartition.h +++ b/src/Parsers/MySQL/ASTDeclareSubPartition.h @@ -27,7 +27,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareSubPartition.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareSubPartition."); } }; diff --git a/src/Parsers/MySQL/ASTDropQuery.h b/src/Parsers/MySQL/ASTDropQuery.h index ff95277ae5e..742cf6ba421 100644 --- a/src/Parsers/MySQL/ASTDropQuery.h +++ b/src/Parsers/MySQL/ASTDropQuery.h @@ -47,7 +47,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDropQuery.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDropQuery."); } }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 0d522b192e4..208737b5bda 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -684,7 +684,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!query->storage) query->set(query->storage, std::make_shared()); else if (query->storage->primary_key) - throw Exception("Multiple primary keys are not allowed.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); query->storage->primary_key = query->columns_list->primary_key; } diff --git a/src/Parsers/ParserDeleteQuery.cpp b/src/Parsers/ParserDeleteQuery.cpp index 7b8057d227e..7b27651d82d 100644 --- a/src/Parsers/ParserDeleteQuery.cpp +++ b/src/Parsers/ParserDeleteQuery.cpp @@ -18,6 +18,7 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_where("WHERE"); ParserExpression parser_exp_elem; ParserKeyword s_settings("SETTINGS"); + ParserKeyword s_on{"ON"}; if (s_delete.ignore(pos, expected)) { @@ -27,6 +28,14 @@ bool ParserDeleteQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table)) return false; + if (s_on.ignore(pos, expected)) + { + String cluster_str; + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; + query->cluster = cluster_str; + } + if (!s_where.ignore(pos, expected)) return false; diff --git a/src/Parsers/ParserExternalDDLQuery.cpp b/src/Parsers/ParserExternalDDLQuery.cpp index 5d6874f524d..01e8fff4f3c 100644 --- a/src/Parsers/ParserExternalDDLQuery.cpp +++ b/src/Parsers/ParserExternalDDLQuery.cpp @@ -62,16 +62,16 @@ bool ParserExternalDDLQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect /// Syntax error is ignored, so we need to convert the error code for parsing failure if (ParserKeyword("ALTER TABLE").ignore(pos)) - throw Exception("Cannot parse MySQL alter query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL alter query."); if (ParserKeyword("RENAME TABLE").ignore(pos)) - throw Exception("Cannot parse MySQL rename query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL rename query."); if (ParserKeyword("DROP TABLE").ignore(pos) || ParserKeyword("TRUNCATE").ignore(pos)) - throw Exception("Cannot parse MySQL drop query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL drop query."); if (ParserKeyword("CREATE TABLE").ignore(pos) || ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos)) - throw Exception("Cannot parse MySQL create query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL create query."); } #endif } diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 7f8a8d59fd0..8601e12ebcb 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -198,11 +198,9 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (allow_settings_after_format_in_insert && s_settings.ignore(pos, expected)) { if (settings_ast) - throw Exception("You have SETTINGS before and after FORMAT, " - "this is not allowed. " - "Consider switching to SETTINGS before FORMAT " - "and disable allow_settings_after_format_in_insert.", - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, + "You have SETTINGS before and after FORMAT, this is not allowed. " + "Consider switching to SETTINGS before FORMAT and disable allow_settings_after_format_in_insert."); /// Settings are written like SET query, so parse them with ParserSetQuery ParserSetQuery parser_settings(true); @@ -230,14 +228,14 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// If format name is followed by ';' (end of query symbol) there is no data to insert. if (data < end && *data == ';') - throw Exception("You have excessive ';' symbol before data for INSERT.\n" + throw Exception(ErrorCodes::SYNTAX_ERROR, "You have excessive ';' symbol before data for INSERT.\n" "Example:\n\n" "INSERT INTO t (x, y) FORMAT TabSeparated\n" ";\tHello\n" "2\tWorld\n" "\n" "Note that there is no ';' just after format name, " - "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR); + "you need to put at least one whitespace symbol before the data."); while (data < end && (*data == ' ' || *data == '\t' || *data == '\f')) ++data; @@ -258,14 +256,21 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (infile) { query->infile = infile; + query->compression = compression; + + query->children.push_back(infile); if (compression) - query->compression = compression; + query->children.push_back(compression); } if (table_function) { query->table_function = table_function; query->partition_by = partition_by_expr; + + query->children.push_back(table_function); + if (partition_by_expr) + query->children.push_back(partition_by_expr); } else { diff --git a/src/Parsers/ParserQueryWithOutput.cpp b/src/Parsers/ParserQueryWithOutput.cpp index 163e71e3201..7024d8cbe11 100644 --- a/src/Parsers/ParserQueryWithOutput.cpp +++ b/src/Parsers/ParserQueryWithOutput.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ namespace DB bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ParserShowTablesQuery show_tables_p; + ParserShowEnginesQuery show_engine_p; ParserSelectWithUnionQuery select_p; ParserTablePropertiesQuery table_p; ParserDescribeTableQuery describe_table_p; @@ -60,6 +62,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec || select_p.parse(pos, query, expected) || show_create_access_entity_p.parse(pos, query, expected) /// should be before `show_tables_p` || show_tables_p.parse(pos, query, expected) + || show_engine_p.parse(pos, query, expected) || table_p.parse(pos, query, expected) || describe_cache_p.parse(pos, query, expected) || describe_table_p.parse(pos, query, expected) diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 107db51f869..17b082a2ddb 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -324,7 +324,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } if (limit_with_ties_occured && distinct_on_expression_list) - throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + throw Exception(ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED, "Can not use WITH TIES alongside LIMIT BY/DISTINCT ON"); if (s_by.ignore(pos, expected)) { @@ -332,10 +332,10 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// But there are other kind of queries like LIMIT n BY smth LIMIT m WITH TIES which are allowed. /// So we have to ignore WITH TIES exactly in LIMIT BY state. if (limit_with_ties_occured) - throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + throw Exception(ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED, "Can not use WITH TIES alongside LIMIT BY/DISTINCT ON"); if (distinct_on_expression_list) - throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Can not use DISTINCT ON alongside LIMIT BY"); limit_by_length = limit_length; limit_by_offset = limit_offset; @@ -347,7 +347,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } if (top_length && limit_length) - throw Exception("Can not use TOP and LIMIT together", ErrorCodes::TOP_AND_LIMIT_TOGETHER); + throw Exception(ErrorCodes::TOP_AND_LIMIT_TOGETHER, "Can not use TOP and LIMIT together"); } else if (s_offset.ignore(pos, expected)) { @@ -360,7 +360,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_row.ignore(pos, expected)) { if (s_rows.ignore(pos, expected)) - throw Exception("Can not use ROW and ROWS together", ErrorCodes::ROW_AND_ROWS_TOGETHER); + throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); offset_with_fetch_maybe = true; } else if (s_rows.ignore(pos, expected)) @@ -372,12 +372,12 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// OFFSET FETCH clause must exists with "ORDER BY" if (!order_expression_list) - throw Exception("Can not use OFFSET FETCH clause without ORDER BY", ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY); + throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); if (s_first.ignore(pos, expected)) { if (s_next.ignore(pos, expected)) - throw Exception("Can not use FIRST and NEXT together", ErrorCodes::FIRST_AND_NEXT_TOGETHER); + throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); } else if (!s_next.ignore(pos, expected)) return false; @@ -388,7 +388,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_row.ignore(pos, expected)) { if (s_rows.ignore(pos, expected)) - throw Exception("Can not use ROW and ROWS together", ErrorCodes::ROW_AND_ROWS_TOGETHER); + throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); } else if (!s_rows.ignore(pos, expected)) return false; @@ -452,7 +452,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// WITH TIES was used without ORDER BY if (!order_expression_list && select_query->limit_with_ties) - throw Exception("Can not use WITH TIES without ORDER BY", ErrorCodes::WITH_TIES_WITHOUT_ORDER_BY); + throw Exception(ErrorCodes::WITH_TIES_WITHOUT_ORDER_BY, "Can not use WITH TIES without ORDER BY"); /// SETTINGS key1 = value1, key2 = value2, ... if (s_settings.ignore(pos, expected)) diff --git a/src/Parsers/ParserShowEngineQuery.h b/src/Parsers/ParserShowEngineQuery.h new file mode 100644 index 00000000000..e06326436f1 --- /dev/null +++ b/src/Parsers/ParserShowEngineQuery.h @@ -0,0 +1,32 @@ +#pragma once + +#include +#include +#include +#include + + +namespace DB +{ + +/** Query SHOW ENGINES + */ +class ParserShowEnginesQuery : public IParserBase +{ +protected: + const char * getName() const override { return "SHOW ENGINES query"; } + + bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override + { + auto query = std::make_shared(); + + if (!ParserKeyword("SHOW ENGINES").ignore(pos, expected)) + return false; + + node = query; + + return true; + } +}; + +} diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 2247167c66e..617ab7816d4 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -190,11 +190,11 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec if (table_join->strictness != JoinStrictness::Unspecified && table_join->kind == JoinKind::Cross) - throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "You must not specify ANY or ALL for CROSS JOIN."); if ((table_join->strictness == JoinStrictness::Semi || table_join->strictness == JoinStrictness::Anti) && (table_join->kind != JoinKind::Left && table_join->kind != JoinKind::Right)) - throw Exception("SEMI|ANTI JOIN should be LEFT or RIGHT.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "SEMI|ANTI JOIN should be LEFT or RIGHT."); if (!ParserKeyword("JOIN").ignore(pos, expected)) return false; diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index f8697b862c7..909c86b0bf0 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -26,44 +26,448 @@ namespace const std::unordered_set keywords { - "CREATE", "DATABASE", "IF", "NOT", "EXISTS", "TEMPORARY", "TABLE", "ON", "CLUSTER", "DEFAULT", - "MATERIALIZED", "EPHEMERAL", "ALIAS", "ENGINE", "AS", "VIEW", "POPULATE", "SETTINGS", "ATTACH", "DETACH", - "DROP", "RENAME", "TO", "ALTER", "ADD", "MODIFY", "CLEAR", "COLUMN", "AFTER", "COPY", - "PROJECT", "PRIMARY", "KEY", "CHECK", "PARTITION", "PART", "FREEZE", "FETCH", "FROM", "SHOW", - "INTO", "OUTFILE", "FORMAT", "TABLES", "DATABASES", "LIKE", "PROCESSLIST", "CASE", "WHEN", "THEN", - "ELSE", "END", "DESCRIBE", "DESC", "USE", "SET", "OPTIMIZE", "FINAL", "DEDUPLICATE", "INSERT", - "VALUES", "SELECT", "DISTINCT", "SAMPLE", "ARRAY", "JOIN", "GLOBAL", "LOCAL", "ANY", "ALL", - "INNER", "LEFT", "RIGHT", "FULL", "OUTER", "CROSS", "USING", "PREWHERE", "WHERE", "GROUP", - "BY", "WITH", "TOTALS", "HAVING", "ORDER", "COLLATE", "LIMIT", "UNION", "AND", "OR", - "ASC", "IN", "KILL", "QUERY", "SYNC", "ASYNC", "TEST", "BETWEEN", "TRUNCATE", "USER", - "ROLE", "PROFILE", "QUOTA", "POLICY", "ROW", "GRANT", "REVOKE", "OPTION", "ADMIN", "EXCEPT", - "REPLACE", "IDENTIFIED", "HOST", "NAME", "READONLY", "WRITABLE", "PERMISSIVE", "FOR", "RESTRICTIVE", "RANDOMIZED", - "INTERVAL", "LIMITS", "ONLY", "TRACKING", "IP", "REGEXP", "ILIKE", "DICTIONARY", "OFFSET", "TRIM", - "LTRIM", "RTRIM", "BOTH", "LEADING", "TRAILING" + "!=", + "", + "%", + "*", + "+", + "-", + "->", + ".", + "/", + ":", + "::", + "<", + "<=", + "<>", + "=", + "==", + ">", + ">=", + "?", + "[", + "]+", + "]+|[", + "^[", + "||", + "]+$", + "ACCESS", + "ACTION", + "ADD", + "ADMIN", + "AFTER", + "ALGORITHM", + "ALIAS", + "ALL", + "ALLOWED_LATENESS", + "ALTER", + "AND", + "ANTI", + "ANY", + "APPLY", + "ARRAY", + "AS", + "ASC", + "ASCENDING", + "ASOF", + "ASSUME", + "AST", + "ASYNC", + "ATTACH", + "AUTO_INCREMENT", + "BACKUP", + "BASE_BACKUP", + "BEGIN", + "BETWEEN", + "BIDIRECTIONAL", + "BOTH", + "BY", + "CACHE", + "CACHES", + "CASCADE", + "CASE", + "CASEWITHEXPRESSION", + "CAST", + "CHANGE", + "CHANGEABLE_IN_READONLY", + "CHANGED", + "CHAR", + "CHARACTER", + "CHECK", + "CLEAR", + "CLUSTER", + "CLUSTER_HOST_IDS", + "CLUSTERS", + "CN", + "CODEC", + "COLLATE", + "COLLECTION", + "COLUMN", + "COLUMNS", + "COMMENT", + "COMMIT", + "COMPRESSION", + "CONCAT", + "CONSTRAINT", + "CREATE", + "CROSS", + "CUBE", + "CURRENT", + "CURRENT_USER", + "DATABASE", + "DATABASES", + "DATE", + "DATE_ADD", + "DATEADD", + "DATE_DIFF", + "DATEDIFF", + "DATE_SUB", + "DATESUB", + "DAY", + "DD", + "DDL", + "DEDUPLICATE", + "DEFAULT", + "DELAY", + "DELETE", + "DESC", + "DESCENDING", + "DESCRIBE", + "DETACH", + "DETACHED", + "DICTIONARIES", + "DICTIONARY", + "DISK", + "DISTINCT", + "DIV", + "DOUBLE_SHA1_HASH", + "DROP", + "ELSE", + "EMPTY", + "ENABLED", + "END", + "ENFORCED", + "ENGINE", + "EPHEMERAL", + "EQUALS", + "ESTIMATE", + "EVENT", + "EVENTS", + "EXCEPT", + "EXCHANGE", + "EXISTS", + "EXPLAIN", + "EXPRESSION", + "EXTERNAL", + "EXTRACT", + "FALSE", + "FETCH", + "FILE", + "FILESYSTEM", + "FILL", + "FILTER", + "FINAL", + "FIRST", + "FOLLOWING", + "FOR", + "FOREIGN", + "FORMAT", + "FREEZE", + "FROM", + "FULL", + "FULLTEXT", + "FUNCTION", + "GLOBAL", + "GRANT", + "GRANTEES", + "GRANTS", + "GRANULARITY", + "GREATER", + "GREATEROREQUALS", + "GROUP", + "GROUPING", + "GROUPS", + "HASH", + "HAVING", + "HDFS", + "HH", + "HIERARCHICAL", + "HOST", + "HOUR", + "ID", + "IDENTIFIED", + "IF", + "ILIKE", + "IN", + "INDEX", + "INFILE", + "INHERIT", + "INJECTIVE", + "INNER", + "INSERT", + "INTERPOLATE", + "INTERSECT", + "INTERVAL", + "INTO", + "INVISIBLE", + "IP", + "IS", + "IS_OBJECT_ID", + "JOIN", + "KEY", + "KEYED", + "KILL", + "LAMBDA", + "LARGE", + "LAST", + "LAYOUT", + "LEADING", + "LEFT", + "LESS", + "LESSOREQUALS", + "LEVEL", + "LIFETIME", + "LIKE", + "LIMIT", + "LIMITS", + "LINEAR", + "LIST", + "LITERAL", + "LIVE", + "LOCAL", + "LTRIM", + "MATCH", + "MATERIALIZE", + "MATERIALIZED", + "MAX", + "MCS", + "MEMORY", + "MI", + "MICROSECOND", + "MILLISECOND", + "MIN", + "MINUS", + "MINUTE", + "MM", + "MOD", + "MODIFY", + "MONTH", + "MOVE", + "MS", + "MULTIIF", + "MUTATION", + "NAME", + "NAMED", + "NANOSECOND", + "NEXT", + "NO", + "NONE", + "NOT", + "NOTEQUALS", + "NOTIN", + "NS", + "NULL", + "NULLS", + "OBJECT", + "OFFSET", + "ON", + "ONLY", + "OPTIMIZE", + "OPTION", + "OR", + "ORDER", + "OUTER", + "OUTFILE", + "OVER", + "OVERRIDE", + "PART", + "PARTIAL", + "PARTITION", + "PARTITIONS", + "PART_MOVE_TO_SHARD", + "PERMANENTLY", + "PERMISSIVE", + "PIPELINE", + "PLAN", + "PLUS", + "POLICY", + "POPULATE", + "POSITION", + "PRECEDING", + "PRECISION", + "PREWHERE", + "PRIMARY", + "PRIVILEGES", + "PROCESSLIST", + "PROFILE", + "PROJECTION", + "QQ", + "QUARTER", + "QUERY", + "QUOTA", + "RANDOMIZED", + "RANGE", + "READONLY", + "REALM", + "RECOMPRESS", + "REFERENCES", + "REFRESH", + "REGEXP", + "REGEXPQUOTEMETA", + "REMOVE", + "RENAME", + "REPLACE", + "REPLACEREGEXPALL", + "REPLACEREGEXPONE", + "RESET", + "RESTORE", + "RESTRICT", + "RESTRICTIVE", + "RESUME", + "REVOKE", + "RIGHT", + "ROLE", + "ROLES", + "ROLLBACK", + "ROLLUP", + "ROW", + "ROWS", + "RTRIM", + "S3", + "SALT", + "SAMPLE", + "SECOND", + "SELECT", + "SEMI", + "SERVER", + "SET", + "SETS", + "SETTING", + "SETTINGS", + "SHA256_HASH", + "SHARD", + "SHOW", + "SIGNED", + "SIMPLE", + "SINGLEVALUEORNULL", + "SNAPSHOT", + "SOURCE", + "SPATIAL", + "SS", + "STDOUT", + "STEP", + "STORAGE", + "STRICT", + "STRICTLY_ASCENDING", + "SUBPARTITION", + "SUBPARTITIONS", + "SUBSTRING", + "SUSPEND", + "SYNC", + "SYNTAX", + "SYSTEM", + "TABLE", + "TABLES", + "TEMPORARY", + "TEST", + "THAN", + "THEN", + "TIES", + "TIMESTAMP", + "TIMESTAMP_ADD", + "TIMESTAMPADD", + "TIMESTAMP_DIFF", + "TIMESTAMPDIFF", + "TIMESTAMP_SUB", + "TIMESTAMPSUB", + "TO", + "TODATE", + "TODATETIME", + "TOP", + "TOTALS", + "TRACKING", + "TRAILING", + "TRANSACTION", + "TREE", + "TRIGGER", + "TRIM", + "TRIMBOTH", + "TRIMLEFT", + "TRIMRIGHT", + "TRUE", + "TRUNCATE", + "TTL", + "TUPLE", + "TYPE", + "UNBOUNDED", + "UNFREEZE", + "UNION", + "UNIQUE", + "UNSIGNED", + "UNTUPLE", + "UPDATE", + "URL", + "USE", + "USER", + "USING", + "UUID", + "VALUES", + "VARYING", + "VIEW", + "VIEWIFPERMITTED", + "VISIBLE", + "VOLUME", + "WATCH", + "WATERMARK", + "WEEK", + "WHEN", + "WHERE", + "WINDOW", + "WITH", + "WK", + "WRITABLE", + "YEAR", + "YYYY", + "ZKPATH" }; +/// We want to keep some words inside quotes. For example we want to keep HOUR inside: +/// Select now() + INTERVAL '1 HOUR' const std::unordered_set keep_words { - "id", "name", "value", "num", - "Id", "Name", "Value", "Num", - "ID", "NAME", "VALUE", "NUM", + "DAY", + "HOUR", + "ID", + "NAME", + "NANOSECOND", + "MICROSECOND", + "MILLISECOND", + "SECOND", + "MINUTE", + "NUM", + "VALUE", + "WEEK", + "MONTH", + "QUARTER", + "YEAR" }; /// The list of nouns collected from here: http://www.desiquintans.com/nounlist, Public domain. +/// Removed nouns with spaces, words with non-ascii chars and keywords std::initializer_list nouns { "aardvark", "abacus", "abbey", "abbreviation", "abdomen", "ability", "abnormality", "abolishment", "abortion", -"abrogation", "absence", "abundance", "abuse", "academics", "academy", "accelerant", "accelerator", "accent", "acceptance", "access", +"abrogation", "absence", "abundance", "abuse", "academics", "academy", "accelerant", "accelerator", "accent", "acceptance", "accessory", "accident", "accommodation", "accompanist", "accomplishment", "accord", "accordance", "accordion", "account", "accountability", "accountant", "accounting", "accuracy", "accusation", "acetate", "achievement", "achiever", "acid", "acknowledgment", "acorn", "acoustics", -"acquaintance", "acquisition", "acre", "acrylic", "act", "action", "activation", "activist", "activity", "actor", "actress", "acupuncture", -"ad", "adaptation", "adapter", "addiction", "addition", "address", "adjective", "adjustment", "admin", "administration", "administrator", +"acquaintance", "acquisition", "acre", "acrylic", "act", "activation", "activist", "activity", "actor", "actress", "acupuncture", +"ad", "adaptation", "adapter", "addiction", "addition", "address", "adjective", "adjustment", "administration", "administrator", "admire", "admission", "adobe", "adoption", "adrenalin", "adrenaline", "adult", "adulthood", "advance", "advancement", "advantage", "advent", "adverb", "advertisement", "advertising", "advice", "adviser", "advocacy", "advocate", "affair", "affect", "affidavit", "affiliate", "affinity", "afoul", "afterlife", "aftermath", "afternoon", "aftershave", "aftershock", "afterthought", "age", "agency", "agenda", "agent", "aggradation", "aggression", "aglet", "agony", "agreement", "agriculture", "aid", "aide", "aim", "air", "airbag", "airbus", "aircraft", "airfare", "airfield", "airforce", "airline", "airmail", "airman", "airplane", "airport", "airship", "airspace", "alarm", "alb", "albatross", -"album", "alcohol", "alcove", "alder", "ale", "alert", "alfalfa", "algebra", "algorithm", "alibi", "alien", "allegation", "allergist", +"album", "alcohol", "alcove", "alder", "ale", "alert", "alfalfa", "algebra", "alibi", "alien", "allegation", "allergist", "alley", "alliance", "alligator", "allocation", "allowance", "alloy", "alluvium", "almanac", "almighty", "almond", "alpaca", "alpenglow", "alpenhorn", "alpha", "alphabet", "altar", "alteration", "alternative", "altitude", "alto", "aluminium", "aluminum", "amazement", "amazon", "ambassador", "amber", "ambience", "ambiguity", "ambition", "ambulance", "amendment", "amenity", "ammunition", "amnesty", "amount", "amusement", @@ -76,7 +480,7 @@ std::initializer_list nouns "apple", "applewood", "appliance", "application", "appointment", "appreciation", "apprehension", "approach", "appropriation", "approval", "apricot", "apron", "apse", "aquarium", "aquifer", "arcade", "arch", "archaeologist", "archaeology", "archeology", "archer", "architect", "architecture", "archives", "area", "arena", "argument", "arithmetic", "ark", "arm", "armadillo", "armament", -"armchair", "armoire", "armor", "armour", "armpit", "armrest", "army", "arrangement", "array", "arrest", "arrival", "arrogance", "arrow", +"armchair", "armoire", "armor", "armour", "armpit", "armrest", "army", "arrangement", "arrest", "arrival", "arrogance", "arrow", "art", "artery", "arthur", "artichoke", "article", "artifact", "artificer", "artist", "ascend", "ascent", "ascot", "ash", "ashram", "ashtray", "aside", "asparagus", "aspect", "asphalt", "aspic", "assassination", "assault", "assembly", "assertion", "assessment", "asset", "assignment", "assist", "assistance", "assistant", "associate", "association", "assumption", "assurance", "asterisk", "astrakhan", "astrolabe", @@ -85,7 +489,7 @@ std::initializer_list nouns "attraction", "attribute", "auction", "audience", "audit", "auditorium", "aunt", "authentication", "authenticity", "author", "authorisation", "authority", "authorization", "auto", "autoimmunity", "automation", "automaton", "autumn", "availability", "avalanche", "avenue", "average", "avocado", "award", "awareness", "awe", "axis", "azimuth", "babe", "baboon", "babushka", "baby", "bachelor", "back", "backbone", -"backburn", "backdrop", "background", "backpack", "backup", "backyard", "bacon", "bacterium", "badge", "badger", "bafflement", "bag", +"backburn", "backdrop", "background", "backpack", "backyard", "bacon", "bacterium", "badge", "badger", "bafflement", "bag", "bagel", "baggage", "baggie", "baggy", "bagpipe", "bail", "bait", "bake", "baker", "bakery", "bakeware", "balaclava", "balalaika", "balance", "balcony", "ball", "ballet", "balloon", "balloonist", "ballot", "ballpark", "bamboo", "ban", "banana", "band", "bandana", "bandanna", "bandolier", "bandwidth", "bangle", "banjo", "bank", "bankbook", "banker", "banking", "bankruptcy", "banner", "banquette", "banyan", @@ -125,16 +529,16 @@ std::initializer_list nouns "captain", "caption", "captor", "car", "carabao", "caramel", "caravan", "carbohydrate", "carbon", "carboxyl", "card", "cardboard", "cardigan", "care", "career", "cargo", "caribou", "carload", "carnation", "carnival", "carol", "carotene", "carp", "carpenter", "carpet", "carpeting", "carport", "carriage", "carrier", "carrot", "carry", "cart", "cartel", "carter", "cartilage", "cartload", "cartoon", "cartridge", "carving", -"cascade", "casement", "cash", "cashew", "cashier", "casino", "casket", "cassava", "casserole", "cassock", "cast", "castanet", +"casement", "cash", "cashew", "cashier", "casino", "casket", "cassava", "casserole", "cassock", "castanet", "castle", "casualty", "cat", "catacomb", "catalogue", "catalysis", "catalyst", "catamaran", "catastrophe", "catch", "catcher", "category", "caterpillar", "cathedral", "cation", "catsup", "cattle", "cauliflower", "causal", "cause", "causeway", "caution", "cave", "caviar", "cayenne", "ceiling", "celebration", "celebrity", "celeriac", "celery", "cell", "cellar", "cello", "celsius", "cement", "cemetery", "cenotaph", "census", "cent", "center", "centimeter", "centre", "centurion", "century", "cephalopod", "ceramic", "ceramics", "cereal", "ceremony", "certainty", "certificate", "certification", "cesspool", "chafe", "chain", "chainstay", "chair", "chairlift", "chairman", "chairperson", -"chaise", "chalet", "chalice", "chalk", "challenge", "chamber", "champagne", "champion", "championship", "chance", "chandelier", "change", -"channel", "chaos", "chap", "chapel", "chaplain", "chapter", "character", "characteristic", "characterization", "chard", "charge", "charger", +"chaise", "chalet", "chalice", "chalk", "challenge", "chamber", "champagne", "champion", "championship", "chance", "chandelier", +"channel", "chaos", "chap", "chapel", "chaplain", "chapter", "characteristic", "characterization", "chard", "charge", "charger", "charity", "charlatan", "charm", "charset", "chart", "charter", "chasm", "chassis", "chastity", "chasuble", "chateau", "chatter", "chauffeur", -"chauvinist", "check", "checkbook", "checking", "checkout", "checkroom", "cheddar", "cheek", "cheer", "cheese", "cheesecake", "cheetah", +"chauvinist", "checkbook", "checking", "checkout", "checkroom", "cheddar", "cheek", "cheer", "cheese", "cheesecake", "cheetah", "chef", "chem", "chemical", "chemistry", "chemotaxis", "cheque", "cherry", "chess", "chest", "chestnut", "chick", "chicken", "chicory", "chief", "chiffonier", "child", "childbirth", "childhood", "chili", "chill", "chime", "chimpanzee", "chin", "chinchilla", "chino", "chip", "chipmunk", "chivalry", "chive", "chives", "chocolate", "choice", "choir", "choker", "cholesterol", "choosing", "chop", @@ -146,13 +550,13 @@ std::initializer_list nouns "claw", "clay", "cleaner", "clearance", "clearing", "cleat", "cleavage", "clef", "cleft", "clergyman", "cleric", "clerk", "click", "client", "cliff", "climate", "climb", "clinic", "clip", "clipboard", "clipper", "cloak", "cloakroom", "clock", "clockwork", "clogs", "cloister", "clone", "close", "closet", "closing", "closure", "cloth", "clothes", "clothing", "cloud", "cloudburst", "clove", "clover", "cloves", -"club", "clue", "cluster", "clutch", "coach", "coal", "coalition", "coast", "coaster", "coat", "cob", "cobbler", "cobweb", +"club", "clue", "clutch", "coach", "coal", "coalition", "coast", "coaster", "coat", "cob", "cobbler", "cobweb", "cock", "cockpit", "cockroach", "cocktail", "cocoa", "coconut", "cod", "code", "codepage", "codling", "codon", "codpiece", "coevolution", "cofactor", "coffee", "coffin", "cohesion", "cohort", "coil", "coin", "coincidence", "coinsurance", "coke", "cold", "coleslaw", "coliseum", -"collaboration", "collagen", "collapse", "collar", "collard", "collateral", "colleague", "collection", "collectivisation", "collectivization", +"collaboration", "collagen", "collapse", "collar", "collard", "collateral", "colleague", "collectivisation", "collectivization", "collector", "college", "collision", "colloquy", "colon", "colonial", "colonialism", "colonisation", "colonization", "colony", "color", -"colorlessness", "colt", "column", "columnist", "comb", "combat", "combination", "combine", "comeback", "comedy", "comestible", "comfort", -"comfortable", "comic", "comics", "comma", "command", "commander", "commandment", "comment", "commerce", "commercial", "commission", +"colorlessness", "colt", "columnist", "comb", "combat", "combination", "combine", "comeback", "comedy", "comestible", "comfort", +"comfortable", "comic", "comics", "comma", "command", "commander", "commandment", "commerce", "commercial", "commission", "commitment", "committee", "commodity", "common", "commonsense", "commotion", "communicant", "communication", "communion", "communist", "community", "commuter", "company", "comparison", "compass", "compassion", "compassionate", "compensation", "competence", "competition", "competitor", "complaint", "complement", "completion", "complex", "complexity", "compliance", "complication", "complicity", "compliment", @@ -162,8 +566,8 @@ std::initializer_list nouns "confidentiality", "configuration", "confirmation", "conflict", "conformation", "confusion", "conga", "congo", "congregation", "congress", "congressman", "congressperson", "conifer", "connection", "connotation", "conscience", "consciousness", "consensus", "consent", "consequence", "conservation", "conservative", "consideration", "consignment", "consist", "consistency", "console", "consonant", "conspiracy", "conspirator", -"constant", "constellation", "constitution", "constraint", "construction", "consul", "consulate", "consulting", "consumer", "consumption", -"contact", "contact lens", "contagion", "container", "content", "contention", "contest", "context", "continent", "contingency", "continuity", +"constant", "constellation", "constitution", "construction", "consul", "consulate", "consulting", "consumer", "consumption", +"contact", "contagion", "container", "content", "contention", "contest", "context", "continent", "contingency", "continuity", "contour", "contract", "contractor", "contrail", "contrary", "contrast", "contribution", "contributor", "control", "controller", "controversy", "convection", "convenience", "convention", "conversation", "conversion", "convert", "convertible", "conviction", "cook", "cookbook", "cookie", "cooking", "coonskin", "cooperation", "coordination", "coordinator", "cop", "cope", "copper", "copy", "copying", @@ -175,33 +579,33 @@ std::initializer_list nouns "cousin", "covariate", "cover", "coverage", "coverall", "cow", "cowbell", "cowboy", "coyote", "crab", "crack", "cracker", "crackers", "cradle", "craft", "craftsman", "cranberry", "crane", "cranky", "crash", "crate", "cravat", "craw", "crawdad", "crayfish", "crayon", "crazy", "cream", "creation", "creationism", "creationist", "creative", "creativity", "creator", "creature", "creche", "credential", -"credenza", "credibility", "credit", "creditor", "creek", "creme brulee", "crepe", "crest", "crew", "crewman", "crewmate", "crewmember", +"credenza", "credibility", "credit", "creditor", "creek", "crepe", "crest", "crew", "crewman", "crewmate", "crewmember", "crewmen", "cria", "crib", "cribbage", "cricket", "cricketer", "crime", "criminal", "crinoline", "crisis", "crisp", "criteria", "criterion", -"critic", "criticism", "crocodile", "crocus", "croissant", "crook", "crop", "cross", "crotch", +"critic", "criticism", "crocodile", "crocus", "croissant", "crook", "crop", "crotch", "croup", "crow", "crowd", "crown", "crucifixion", "crude", "cruelty", "cruise", "crumb", "crunch", "crusader", "crush", "crust", "cry", -"crystal", "crystallography", "cub", "cube", "cuckoo", "cucumber", "cue", "cuisine", "cultivar", "cultivator", "culture", +"crystal", "crystallography", "cub", "cuckoo", "cucumber", "cue", "cuisine", "cultivar", "cultivator", "culture", "culvert", "cummerbund", "cup", "cupboard", "cupcake", "cupola", "curd", "cure", "curio", "curiosity", "curl", "curler", "currant", "currency", -"current", "curriculum", "curry", "curse", "cursor", "curtailment", "curtain", "curve", "cushion", "custard", "custody", "custom", "customer", +"curriculum", "curry", "curse", "cursor", "curtailment", "curtain", "curve", "cushion", "custard", "custody", "custom", "customer", "cut", "cuticle", "cutlet", "cutover", "cutting", "cyclamen", "cycle", "cyclone", "cyclooxygenase", "cygnet", "cylinder", "cymbal", "cynic", "cyst", "cytokine", "cytoplasm", "dad", "daddy", "daffodil", "dagger", "dahlia", "daikon", "daily", "dairy", "daisy", "dam", "damage", "dame", "dance", "dancer", "dancing", "dandelion", "danger", "dare", "dark", "darkness", "darn", "dart", "dash", "dashboard", -"data", "date", "daughter", "dawn", "day", "daybed", "daylight", "dead", "deadline", "deal", "dealer", "dealing", "dearest", +"data", "daughter", "dawn", "daybed", "daylight", "dead", "deadline", "deal", "dealer", "dealing", "dearest", "death", "deathwatch", "debate", "debris", "debt", "debtor", "decade", "decadence", "decency", "decimal", "decision", "deck", "declaration", "declination", "decline", "decoder", "decongestant", "decoration", "decrease", "decryption", "dedication", "deduce", "deduction", "deed", "deep", "deer", "defeat", "defendant", "defender", "defense", "deficit", "definition", "deformation", -"degradation", "degree", "delay", "deliberation", "delight", "delivery", "demand", "democracy", "democrat", "demon", "demur", "den", +"degradation", "degree", "deliberation", "delight", "delivery", "demand", "democracy", "democrat", "demon", "demur", "den", "denim", "denominator", "density", "dentist", "deodorant", "department", "departure", "dependency", "dependent", "deployment", "deposit", "deposition", "depot", "depression", "depressive", "depth", "deputy", "derby", "derivation", "derivative", "derrick", "descendant", "descent", "description", "desert", "design", "designation", "designer", "desire", "desk", "desktop", "dessert", "destination", "destiny", "destroyer", "destruction", "detail", "detainee", "detainment", "detection", "detective", "detector", "detention", "determination", "detour", "devastation", "developer", "developing", "development", "developmental", "deviance", "deviation", "device", "devil", "dew", "dhow", "diabetes", "diadem", -"diagnosis", "diagram", "dial", "dialect", "dialogue", "diam", "diamond", "diaper", "diaphragm", "diarist", "diary", "dibble", "dickey", "dictaphone", "dictator", "diction", "dictionary", "die", "diesel", "diet", "difference", "differential", "difficulty", "diffuse", +"diagnosis", "diagram", "dial", "dialect", "dialogue", "diam", "diamond", "diaper", "diaphragm", "diarist", "diary", "dibble", "dickey", "dictaphone", "dictator", "diction", "die", "diesel", "diet", "difference", "differential", "difficulty", "diffuse", "dig", "digestion", "digestive", "digger", "digging", "digit", "dignity", "dilapidation", "dill", "dilution", "dime", "dimension", "dimple", "diner", "dinghy", "dining", "dinner", "dinosaur", "dioxide", "dip", "diploma", "diplomacy", "dipstick", "direction", "directive", "director", "directory", "dirndl", "dirt", "disability", "disadvantage", "disagreement", "disappointment", "disarmament", "disaster", "discharge", "discipline", "disclaimer", "disclosure", "disco", "disconnection", "discount", "discourse", "discovery", "discrepancy", "discretion", "discrimination", "discussion", "disdain", "disease", "disembodiment", "disengagement", "disguise", "disgust", "dish", "dishwasher", -"disk", "disparity", "dispatch", "displacement", "display", "disposal", "disposer", "disposition", "dispute", "disregard", "disruption", +"disparity", "dispatch", "displacement", "display", "disposal", "disposer", "disposition", "dispute", "disregard", "disruption", "dissemination", "dissonance", "distance", "distinction", "distortion", "distribution", "distributor", "district", "divalent", "divan", "diver", "diversity", "divide", "dividend", "divider", "divine", "diving", "division", "divorce", "doc", "dock", "doctor", "doctorate", "doctrine", "document", "documentary", "documentation", "doe", "dog", "doggie", "dogsled", "dogwood", "doing", "doll", "dollar", "dollop", @@ -209,10 +613,10 @@ std::initializer_list nouns "doorpost", "doorway", "dory", "dose", "dot", "double", "doubling", "doubt", "doubter", "dough", "doughnut", "down", "downfall", "downforce", "downgrade", "download", "downstairs", "downtown", "downturn", "dozen", "draft", "drag", "dragon", "dragonfly", "dragonfruit", "dragster", "drain", "drainage", "drake", "drama", "dramaturge", "drapes", "draw", "drawbridge", "drawer", "drawing", "dream", "dreamer", "dredger", -"dress", "dresser", "dressing", "drill", "drink", "drinking", "drive", "driver", "driveway", "driving", "drizzle", "dromedary", "drop", +"dress", "dresser", "dressing", "drill", "drink", "drinking", "drive", "driver", "driveway", "driving", "drizzle", "dromedary", "drudgery", "drug", "drum", "drummer", "drunk", "dryer", "duck", "duckling", "dud", "dude", "due", "duel", "dueling", "duffel", "dugout", -"dulcimer", "dumbwaiter", "dump", "dump truck", "dune", "dune buggy", "dungarees", "dungeon", "duplexer", "duration", "durian", "dusk", -"dust", "dust storm", "duster", "duty", "dwarf", "dwell", "dwelling", "dynamics", "dynamite", "dynamo", "dynasty", "dysfunction", +"dulcimer", "dumbwaiter", "dump", "dune", "dungarees", "dungeon", "duplexer", "duration", "durian", "dusk", +"dust", "duster", "duty", "dwarf", "dwell", "dwelling", "dynamics", "dynamite", "dynamo", "dynasty", "dysfunction", "eagle", "eaglet", "ear", "eardrum", "earmuffs", "earnings", "earplug", "earring", "earrings", "earth", "earthquake", "earthworm", "ease", "easel", "east", "eating", "eaves", "eavesdropper", "ecclesia", "echidna", "eclipse", "ecliptic", "ecology", "economics", "economy", "ecosystem", "ectoderm", "ectodermal", "ecumenist", "eddy", "edge", "edger", "edible", "editing", "edition", "editor", "editorial", @@ -222,19 +626,19 @@ std::initializer_list nouns "ellipse", "elm", "elongation", "elver", "email", "emanate", "embarrassment", "embassy", "embellishment", "embossing", "embryo", "emerald", "emergence", "emergency", "emergent", "emery", "emission", "emitter", "emotion", "emphasis", "empire", "employ", "employee", "employer", "employment", "empowerment", "emu", "enactment", "encirclement", "enclave", "enclosure", "encounter", "encouragement", "encyclopedia", -"end", "endive", "endoderm", "endorsement", "endothelium", "endpoint", "enemy", "energy", "enforcement", "engagement", "engine", "engineer", +"endive", "endoderm", "endorsement", "endothelium", "endpoint", "enemy", "energy", "enforcement", "engagement", "engineer", "engineering", "enigma", "enjoyment", "enquiry", "enrollment", "enterprise", "entertainment", "enthusiasm", "entirety", "entity", "entrance", "entree", "entrepreneur", "entry", "envelope", "environment", "envy", "enzyme", "epauliere", "epee", "ephemera", "ephemeris", "ephyra", "epic", "episode", "epithelium", "epoch", "eponym", "epoxy", "equal", "equality", "equation", "equinox", "equipment", "equity", "equivalent", "era", "eraser", "erection", "erosion", "error", "escalator", "escape", "escort", "espadrille", "espalier", "essay", "essence", "essential", -"establishment", "estate", "estimate", "estrogen", "estuary", "eternity", "ethernet", "ethics", "ethnicity", "ethyl", "euphonium", "eurocentrism", -"evaluation", "evaluator", "evaporation", "eve", "evening", "event", "everybody", "everyone", "everything", "eviction", +"establishment", "estate", "estrogen", "estuary", "eternity", "ethernet", "ethics", "ethnicity", "ethyl", "euphonium", "eurocentrism", +"evaluation", "evaluator", "evaporation", "eve", "evening", "everybody", "everyone", "everything", "eviction", "evidence", "evil", "evocation", "evolution", "exaggeration", "exam", "examination", "examiner", "example", -"exasperation", "excellence", "exception", "excerpt", "excess", "exchange", "excitement", "exclamation", "excursion", "excuse", "execution", +"exasperation", "excellence", "exception", "excerpt", "excess", "excitement", "exclamation", "excursion", "excuse", "execution", "executive", "executor", "exercise", "exhaust", "exhaustion", "exhibit", "exhibition", "exile", "existence", "exit", "exocrine", "expansion", "expansionism", "expectancy", "expectation", "expedition", "expense", "experience", "experiment", "experimentation", "expert", "expertise", -"explanation", "exploration", "explorer", "explosion", "export", "expose", "exposition", "exposure", "expression", "extension", "extent", -"exterior", "external", "extinction", "extreme", "extremist", "eye", "eyeball", "eyebrow", "eyebrows", "eyeglasses", "eyelash", "eyelashes", +"explanation", "exploration", "explorer", "explosion", "export", "expose", "exposition", "exposure", "extension", "extent", +"exterior", "extinction", "extreme", "extremist", "eye", "eyeball", "eyebrow", "eyebrows", "eyeglasses", "eyelash", "eyelashes", "eyelid", "eyelids", "eyeliner", "eyestrain", "eyrie", "fabric", "face", "facelift", "facet", "facility", "facsimile", "fact", "factor", "factory", "faculty", "fahrenheit", "fail", "failure", "fairness", "fairy", "faith", "faithful", "fall", "fallacy", "fame", "familiar", "familiarity", "family", "fan", "fang", "fanlight", "fanny", "fantasy", "farm", "farmer", "farming", "farmland", @@ -242,13 +646,13 @@ std::initializer_list nouns "favorite", "fawn", "fax", "fear", "feast", "feather", "feature", "fedelini", "federation", "fedora", "fee", "feed", "feedback", "feeding", "feel", "feeling", "fellow", "felony", "female", "fen", "fence", "fencing", "fender", "feng", "fennel", "ferret", "ferry", "ferryboat", "fertilizer", "festival", "fetus", "few", "fiber", "fiberglass", "fibre", "fibroblast", "fibrosis", "ficlet", "fiction", "fiddle", "field", -"fiery", "fiesta", "fifth", "fig", "fight", "fighter", "figure", "figurine", "file", "filing", "fill", "fillet", "filly", "film", "filter", -"filth", "final", "finance", "financing", "finding", "fine", "finer", "finger", "fingerling", "fingernail", "finish", "finisher", "fir", -"fire", "fireman", "fireplace", "firewall", "firm", "first", "fish", "fishbone", "fisherman", "fishery", "fishing", "fishmonger", "fishnet", +"fiery", "fiesta", "fifth", "fig", "fight", "fighter", "figure", "figurine", "filing", "fillet", "filly", "film", +"filth", "finance", "financing", "finding", "fine", "finer", "finger", "fingerling", "fingernail", "finish", "finisher", "fir", +"fire", "fireman", "fireplace", "firewall", "firm", "fish", "fishbone", "fisherman", "fishery", "fishing", "fishmonger", "fishnet", "fisting", "fit", "fitness", "fix", "fixture", "flag", "flair", "flame", "flan", "flanker", "flare", "flash", "flat", "flatboat", "flavor", "flax", "fleck", "fledgling", "fleece", "flesh", "flexibility", "flick", "flicker", "flight", "flint", "flintlock", "flock", "flood", "floodplain", "floor", "floozie", "flour", "flow", "flower", "flu", "flugelhorn", "fluke", "flume", "flung", "flute", "fly", -"flytrap", "foal", "foam", "fob", "focus", "fog", "fold", "folder", "folk", "folklore", "follower", "following", "fondue", "font", "food", +"flytrap", "foal", "foam", "fob", "focus", "fog", "fold", "folder", "folk", "folklore", "follower", "fondue", "font", "food", "foodstuffs", "fool", "foot", "footage", "football", "footnote", "footprint", "footrest", "footstep", "footstool", "footwear", "forage", "forager", "foray", "force", "ford", "forearm", "forebear", "forecast", "forehead", "foreigner", "forelimb", "forest", "forestry", "forever", "forgery", "fork", "form", "formal", "formamide", "formation", "former", "formicarium", "formula", "fort", "forte", "fortnight", @@ -256,7 +660,7 @@ std::initializer_list nouns "frame", "framework", "fratricide", "fraud", "fraudster", "freak", "freckle", "freedom", "freelance", "freezer", "freezing", "freight", "freighter", "frenzy", "freon", "frequency", "fresco", "friction", "fridge", "friend", "friendship", "fries", "frigate", "fright", "fringe", "fritter", "frock", "frog", "front", "frontier", "frost", "frosting", "frown", "fruit", "frustration", "fry", "fuel", "fugato", -"fulfillment", "full", "fun", "function", "functionality", "fund", "funding", "fundraising", "funeral", "fur", "furnace", "furniture", +"fulfillment", "fun", "functionality", "fund", "funding", "fundraising", "funeral", "fur", "furnace", "furniture", "furry", "fusarium", "futon", "future", "gadget", "gaffe", "gaffer", "gain", "gaiters", "gale", "gallery", "galley", "gallon", "galoshes", "gambling", "game", "gamebird", "gaming", "gander", "gang", "gap", "garage", "garb", "garbage", "garden", "garlic", "garment", "garter", "gas", "gasket", "gasoline", "gasp", "gastronomy", "gastropod", "gate", "gateway", "gather", "gathering", @@ -269,7 +673,7 @@ std::initializer_list nouns "goggles", "going", "gold", "goldfish", "golf", "gondola", "gong", "good", "goodbye", "goodie", "goodness", "goodnight", "goodwill", "goose", "gopher", "gorilla", "gosling", "gossip", "governance", "government", "governor", "gown", "grace", "grade", "gradient", "graduate", "graduation", "graffiti", "graft", "grain", "gram", "grammar", "gran", "grand", "grandchild", "granddaughter", -"grandfather", "grandma", "grandmom", "grandmother", "grandpa", "grandparent", "grandson", "granny", "granola", "grant", "grape", "grapefruit", +"grandfather", "grandma", "grandmom", "grandmother", "grandpa", "grandparent", "grandson", "granny", "granola", "grape", "grapefruit", "graph", "graphic", "grasp", "grass", "grasshopper", "grassland", "gratitude", "gravel", "gravitas", "gravity", "gravy", "gray", "grease", "greatness", "greed", "green", "greenhouse", "greens", "grenade", "grey", "grid", "grief", "grill", "grin", "grip", "gripper", "grit", "grocery", "ground", "grouper", "grouse", "grove", "growth", "grub", "guacamole", @@ -279,7 +683,7 @@ std::initializer_list nouns "halibut", "hall", "halloween", "hallway", "halt", "ham", "hamburger", "hammer", "hammock", "hamster", "hand", "handball", "handful", "handgun", "handicap", "handle", "handlebar", "handmaiden", "handover", "handrail", "handsaw", "hanger", "happening", "happiness", "harald", "harbor", "harbour", "hardboard", "hardcover", "hardening", "hardhat", "hardship", "hardware", "hare", "harm", -"harmonica", "harmonise", "harmonize", "harmony", "harp", "harpooner", "harpsichord", "harvest", "harvester", "hash", "hashtag", "hassock", +"harmonica", "harmonise", "harmonize", "harmony", "harp", "harpooner", "harpsichord", "harvest", "harvester", "hashtag", "hassock", "haste", "hat", "hatbox", "hatchet", "hatchling", "hate", "hatred", "haunt", "haven", "haversack", "havoc", "hawk", "hay", "haze", "hazel", "hazelnut", "head", "headache", "headlight", "headline", "headphones", "headquarters", "headrest", "health", "hearing", "hearsay", "heart", "heartache", "heartbeat", "hearth", "hearthside", "heartwood", "heat", "heater", "heating", "heaven", @@ -290,53 +694,53 @@ std::initializer_list nouns "hobbit", "hobby", "hockey", "hoe", "hog", "hold", "holder", "hole", "holiday", "home", "homeland", "homeownership", "hometown", "homework", "homicide", "homogenate", "homonym", "honesty", "honey", "honeybee", "honeydew", "honor", "honoree", "hood", "hoof", "hook", "hop", "hope", "hops", "horde", "horizon", "hormone", "horn", "hornet", "horror", "horse", "horseradish", "horst", "hose", -"hosiery", "hospice", "hospital", "hospitalisation", "hospitality", "hospitalization", "host", "hostel", "hostess", "hotdog", "hotel", -"hound", "hour", "hourglass", "house", "houseboat", "household", "housewife", "housework", "housing", "hovel", "hovercraft", "howard", +"hosiery", "hospice", "hospital", "hospitalisation", "hospitality", "hospitalization", "hostel", "hostess", "hotdog", "hotel", +"hound", "hourglass", "house", "houseboat", "household", "housewife", "housework", "housing", "hovel", "hovercraft", "howard", "howitzer", "hub", "hubcap", "hubris", "hug", "hugger", "hull", "human", "humanity", "humidity", "hummus", "humor", "humour", "hunchback", "hundred", "hunger", "hunt", "hunter", "hunting", "hurdle", "hurdler", "hurricane", "hurry", "hurt", "husband", "hut", "hutch", "hyacinth", "hybridisation", "hybridization", "hydrant", "hydraulics", "hydrocarb", "hydrocarbon", "hydrofoil", "hydrogen", "hydrolyse", "hydrolysis", "hydrolyze", "hydroxyl", "hyena", "hygienic", "hype", "hyphenation", "hypochondria", "hypothermia", "hypothesis", "ice", -"iceberg", "icebreaker", "icecream", "icicle", "icing", "icon", "icy", "id", "idea", "ideal", "identification", "identity", "ideology", +"iceberg", "icebreaker", "icecream", "icicle", "icing", "icon", "icy", "idea", "ideal", "identification", "identity", "ideology", "idiom", "idiot", "igloo", "ignorance", "ignorant", "ikebana", "illegal", "illiteracy", "illness", "illusion", "illustration", "image", "imagination", "imbalance", "imitation", "immigrant", "immigration", "immortal", "impact", "impairment", "impala", "impediment", "implement", "implementation", "implication", "import", "importance", "impostor", "impress", "impression", "imprisonment", "impropriety", "improvement", "impudence", "impulse", "inability", "inauguration", "inbox", "incandescence", "incarnation", "incense", "incentive", "inch", "incidence", "incident", "incision", "inclusion", "income", "incompetence", "inconvenience", "increase", "incubation", "independence", -"independent", "index", "indication", "indicator", "indigence", "individual", "industrialisation", "industrialization", "industry", "inequality", +"independent", "indication", "indicator", "indigence", "individual", "industrialisation", "industrialization", "industry", "inequality", "inevitable", "infancy", "infant", "infarction", "infection", "infiltration", "infinite", "infix", "inflammation", "inflation", "influence", "influx", "info", "information", "infrastructure", "infusion", "inglenook", "ingrate", "ingredient", "inhabitant", "inheritance", "inhibition", "inhibitor", "initial", "initialise", "initialize", "initiative", "injunction", "injury", "injustice", "ink", "inlay", "inn", "innervation", -"innocence", "innocent", "innovation", "input", "inquiry", "inscription", "insect", "insectarium", "insert", "inside", "insight", "insolence", +"innocence", "innocent", "innovation", "input", "inquiry", "inscription", "insect", "insectarium", "inside", "insight", "insolence", "insomnia", "inspection", "inspector", "inspiration", "installation", "instance", "instant", "instinct", "institute", "institution", "instruction", "instructor", "instrument", "instrumentalist", "instrumentation", "insulation", "insurance", "insurgence", "insurrection", "integer", "integral", "integration", "integrity", "intellect", "intelligence", "intensity", "intent", "intention", "intentionality", "interaction", "interchange", "interconnection", "intercourse", "interest", "interface", "interferometer", "interior", "interject", "interloper", -"internet", "interpretation", "interpreter", "interval", "intervenor", "intervention", "interview", "interviewer", "intestine", "introduction", +"internet", "interpretation", "interpreter", "intervenor", "intervention", "interview", "interviewer", "intestine", "introduction", "intuition", "invader", "invasion", "invention", "inventor", "inventory", "inverse", "inversion", "investigation", "investigator", "investment", "investor", "invitation", "invite", "invoice", "involvement", "iridescence", "iris", "iron", "ironclad", "irony", "irrigation", "ischemia", "island", "isogloss", "isolation", "issue", "item", "itinerary", "ivory", "jack", "jackal", "jacket", "jackfruit", "jade", "jaguar", -"jail", "jailhouse", "jalapeño", "jam", "jar", "jasmine", "jaw", "jazz", "jealousy", "jeans", "jeep", "jelly", "jellybeans", "jellyfish", +"jail", "jailhouse", "jam", "jar", "jasmine", "jaw", "jazz", "jealousy", "jeans", "jeep", "jelly", "jellybeans", "jellyfish", "jerk", "jet", "jewel", "jeweller", "jewellery", "jewelry", "jicama", "jiffy", "job", "jockey", "jodhpurs", "joey", "jogging", "joint", "joke", "jot", "journal", "journalism", "journalist", "journey", "joy", "judge", "judgment", "judo", "jug", "juggernaut", "juice", "julienne", "jumbo", "jump", "jumper", "jumpsuit", "jungle", "junior", "junk", "junker", "junket", "jury", "justice", "justification", "jute", "kale", "kamikaze", "kangaroo", "karate", "kayak", "kazoo", "kebab", "keep", "keeper", "kendo", "kennel", "ketch", "ketchup", "kettle", "kettledrum", -"key", "keyboard", "keyboarding", "keystone", "kick", "kid", "kidney", "kielbasa", "kill", "killer", "killing", "kilogram", +"keyboard", "keyboarding", "keystone", "kick", "kid", "kidney", "kielbasa", "killer", "killing", "kilogram", "kilometer", "kilt", "kimono", "kinase", "kind", "kindness", "king", "kingdom", "kingfish", "kiosk", "kiss", "kit", "kitchen", "kite", "kitsch", "kitten", "kitty", "kiwi", "knee", "kneejerk", "knickers", "knife", "knight", "knitting", "knock", "knot", "knowledge", "knuckle", "koala", "kohlrabi", "kumquat", "lab", "label", "labor", "laboratory", "laborer", "labour", "labourer", "lace", "lack", "lacquerware", "lad", "ladder", "ladle", "lady", "ladybug", "lag", "lake", "lamb", "lambkin", "lament", "lamp", "lanai", "land", "landform", "landing", "landmine", "landscape", "lane", "language", "lantern", "lap", "laparoscope", "lapdog", "laptop", "larch", "lard", -"larder", "lark", "larva", "laryngitis", "lasagna", "lashes", "last", "latency", "latex", "lathe", "latitude", "latte", "latter", "laugh", -"laughter", "laundry", "lava", "law", "lawmaker", "lawn", "lawsuit", "lawyer", "lay", "layer", "layout", "lead", "leader", "leadership", -"leading", "leaf", "league", "leaker", "leap", "learning", "leash", "leather", "leave", "leaver", "lecture", "leek", "leeway", "left", +"larder", "lark", "larva", "laryngitis", "lasagna", "lashes", "latency", "latex", "lathe", "latitude", "latte", "latter", "laugh", +"laughter", "laundry", "lava", "law", "lawmaker", "lawn", "lawsuit", "lawyer", "lay", "layer", "lead", "leader", "leadership", +"leaf", "league", "leaker", "leap", "learning", "leash", "leather", "leave", "leaver", "lecture", "leek", "leeway", "leg", "legacy", "legal", "legend", "legging", "legislation", "legislator", "legislature", "legitimacy", "legume", "leisure", "lemon", "lemonade", "lemur", "lender", "lending", "length", "lens", "lentil", "leopard", "leprosy", "leptocephalus", "lesson", "letter", -"lettuce", "level", "lever", "leverage", "leveret", "liability", "liar", "liberty", "libido", "library", "licence", "license", "licensing", -"licorice", "lid", "lie", "lieu", "lieutenant", "life", "lifestyle", "lifetime", "lift", "ligand", "light", "lighting", "lightning", +"lettuce", "lever", "leverage", "leveret", "liability", "liar", "liberty", "libido", "library", "licence", "license", "licensing", +"licorice", "lid", "lie", "lieu", "lieutenant", "life", "lifestyle", "lift", "ligand", "light", "lighting", "lightning", "lightscreen", "ligula", "likelihood", "likeness", "lilac", "lily", "limb", "lime", "limestone", "limitation", "limo", "line", "linen", "liner", "linguist", "linguistics", "lining", "link", "linkage", "linseed", "lion", "lip", "lipid", "lipoprotein", "lipstick", -"liquid", "liquidity", "liquor", "list", "listening", "listing", "literate", "literature", "litigation", "litmus", "litter", "littleneck", -"liver", "livestock", "living", "lizard", "llama", "load", "loading", "loaf", "loafer", "loan", "lobby", "lobotomy", "lobster", "local", +"liquid", "liquidity", "liquor", "listening", "listing", "literate", "literature", "litigation", "litmus", "litter", "littleneck", +"liver", "livestock", "living", "lizard", "llama", "load", "loading", "loaf", "loafer", "loan", "lobby", "lobotomy", "lobster", "locality", "location", "lock", "locker", "locket", "locomotive", "locust", "lode", "loft", "log", "loggia", "logic", "login", "logistics", "logo", "loincloth", "lollipop", "loneliness", "longboat", "longitude", "look", "lookout", "loop", "loophole", "loquat", "lord", "loss", "lot", "lotion", "lottery", "lounge", "louse", "lout", "love", "lover", "lox", "loyalty", "luck", "luggage", "lumber", "lumberman", "lunch", @@ -350,28 +754,28 @@ std::initializer_list nouns "manufacturer", "manufacturing", "many", "map", "maple", "mapping", "maracas", "marathon", "marble", "march", "mare", "margarine", "margin", "mariachi", "marimba", "marines", "marionberry", "mark", "marker", "market", "marketer", "marketing", "marketplace", "marksman", "markup", "marmalade", "marriage", "marsh", "marshland", "marshmallow", "marten", "marxism", "mascara", "mask", "masonry", "mass", "massage", "mast", -"master", "masterpiece", "mastication", "mastoid", "mat", "match", "matchmaker", "mate", "material", "maternity", "math", "mathematics", -"matrix", "matter", "mattock", "mattress", "max", "maximum", "maybe", "mayonnaise", "mayor", "meadow", "meal", "mean", "meander", "meaning", +"master", "masterpiece", "mastication", "mastoid", "mat", "matchmaker", "mate", "material", "maternity", "math", "mathematics", +"matrix", "matter", "mattock", "mattress", "maximum", "maybe", "mayonnaise", "mayor", "meadow", "meal", "mean", "meander", "meaning", "means", "meantime", "measles", "measure", "measurement", "meat", "meatball", "meatloaf", "mecca", "mechanic", "mechanism", "med", "medal", "media", "median", "medication", "medicine", "medium", "meet", "meeting", "melatonin", "melody", "melon", "member", "membership", "membrane", -"meme", "memo", "memorial", "memory", "men", "menopause", "menorah", "mention", "mentor", "menu", "merchandise", "merchant", "mercury", +"meme", "memo", "memorial", "men", "menopause", "menorah", "mention", "mentor", "menu", "merchandise", "merchant", "mercury", "meridian", "meringue", "merit", "mesenchyme", "mess", "message", "messenger", "messy", "metabolite", "metal", "metallurgist", "metaphor", "meteor", "meteorology", "meter", "methane", "method", "methodology", "metric", "metro", "metronome", "mezzanine", "microlending", "micronutrient", "microphone", "microwave", "midden", "middle", "middleman", "midline", "midnight", "midwife", "might", "migrant", "migration", "mile", "mileage", "milepost", "milestone", "military", "milk", "milkshake", "mill", "millennium", "millet", "millimeter", "million", -"millisecond", "millstone", "mime", "mimosa", "min", "mincemeat", "mind", "mine", "mineral", "mineshaft", "mini", "minibus", -"minimalism", "minimum", "mining", "minion", "minister", "mink", "minnow", "minor", "minority", "mint", "minute", "miracle", +"millstone", "mime", "mimosa", "mincemeat", "mind", "mine", "mineral", "mineshaft", "mini", "minibus", +"minimalism", "minimum", "mining", "minion", "minister", "mink", "minnow", "minor", "minority", "mint", "miracle", "mirror", "miscarriage", "miscommunication", "misfit", "misnomer", "misogyny", "misplacement", "misreading", "misrepresentation", "miss", "missile", "mission", "missionary", "mist", "mistake", "mister", "misunderstand", "miter", "mitten", "mix", "mixer", "mixture", "moai", "moat", "mob", "mobile", "mobility", "mobster", "moccasins", "mocha", "mochi", "mode", "model", "modeling", "modem", "modernist", "modernity", "modification", "molar", "molasses", "molding", "mole", "molecule", "mom", "moment", "monastery", "monasticism", "money", "monger", "monitor", -"monitoring", "monk", "monkey", "monocle", "monopoly", "monotheism", "monsoon", "monster", "month", "monument", "mood", "moody", "moon", +"monitoring", "monk", "monkey", "monocle", "monopoly", "monotheism", "monsoon", "monster", "monument", "mood", "moody", "moon", "moonlight", "moonscape", "moonshine", "moose", "mop", "morale", "morbid", "morbidity", "morning", "moron", "morphology", "morsel", "mortal", "mortality", "mortgage", "mortise", "mosque", "mosquito", "most", "motel", "moth", "mother", "motion", "motivation", "motive", "motor", "motorboat", "motorcar", "motorcycle", "mound", "mountain", "mouse", "mouser", "mousse", "moustache", "mouth", "mouton", "movement", "mover", "movie", "mower", "mozzarella", "mud", "muffin", "mug", "mukluk", "mule", "multimedia", "murder", "muscat", "muscatel", "muscle", "musculature", "museum", "mushroom", "music", "musician", "muskrat", "mussel", "mustache", "mustard", -"mutation", "mutt", "mutton", "mycoplasma", "mystery", "myth", "mythology", "nail", "name", "naming", "nanoparticle", "napkin", "narrative", +"mutt", "mutton", "mycoplasma", "mystery", "myth", "mythology", "nail", "naming", "nanoparticle", "napkin", "narrative", "nasal", "nation", "nationality", "native", "naturalisation", "nature", "navigation", "necessity", "neck", "necklace", "necktie", "nectar", "nectarine", "need", "needle", "neglect", "negligee", "negotiation", "neighbor", "neighborhood", "neighbour", "neighbourhood", "neologism", "neon", "neonate", "nephew", "nerve", "nest", "nestling", "nestmate", "net", "netball", "netbook", "netsuke", "network", "networking", @@ -381,13 +785,13 @@ std::initializer_list nouns "noodle", "noodles", "noon", "norm", "normal", "normalisation", "normalization", "north", "nose", "notation", "note", "notebook", "notepad", "nothing", "notice", "notion", "notoriety", "nougat", "noun", "nourishment", "novel", "nucleotidase", "nucleotide", "nudge", "nuke", "number", "numeracy", "numeric", "numismatist", "nun", "nurse", "nursery", "nursing", "nurture", "nut", "nutmeg", "nutrient", "nutrition", -"nylon", "nymph", "oak", "oar", "oasis", "oat", "oatmeal", "oats", "obedience", "obesity", "obi", "object", "objection", "objective", +"nylon", "nymph", "oak", "oar", "oasis", "oat", "oatmeal", "oats", "obedience", "obesity", "obi", "objection", "objective", "obligation", "oboe", "observation", "observatory", "obsession", "obsidian", "obstacle", "occasion", "occupation", "occurrence", "ocean", "ocelot", "octagon", "octave", "octavo", "octet", "octopus", "odometer", "odyssey", "oeuvre", "offence", "offense", "offer", -"offering", "office", "officer", "official", "offset", "oil", "okra", "oldie", "oleo", "olive", "omega", "omelet", "omission", "omnivore", +"offering", "office", "officer", "official", "oil", "okra", "oldie", "oleo", "olive", "omega", "omelet", "omission", "omnivore", "oncology", "onion", "online", "onset", "opening", "opera", "operating", "operation", "operator", "ophthalmologist", "opinion", "opium", "opossum", "opponent", "opportunist", "opportunity", "opposite", "opposition", "optimal", "optimisation", "optimist", "optimization", -"option", "orange", "orangutan", "orator", "orchard", "orchestra", "orchid", "ordinary", "ordination", "ore", "oregano", "organ", +"orange", "orangutan", "orator", "orchard", "orchestra", "orchid", "ordinary", "ordination", "ore", "oregano", "organ", "organisation", "organising", "organization", "organizing", "orient", "orientation", "origin", "original", "originality", "ornament", "osmosis", "osprey", "ostrich", "other", "otter", "ottoman", "ounce", "outback", "outcome", "outfielder", "outfit", "outhouse", "outlaw", "outlay", "outlet", "outline", "outlook", "output", "outrage", "outrigger", "outrun", "outset", "outside", "oval", "ovary", "oven", "overcharge", @@ -398,7 +802,7 @@ std::initializer_list nouns "pansy", "panther", "panties", "pantologist", "pantology", "pantry", "pants", "pantsuit", "panty", "pantyhose", "papa", "papaya", "paper", "paperback", "paperwork", "parable", "parachute", "parade", "paradise", "paragraph", "parallelogram", "paramecium", "paramedic", "parameter", "paranoia", "parcel", "parchment", "pard", "pardon", "parent", "parenthesis", "parenting", "park", "parka", "parking", "parliament", -"parole", "parrot", "parser", "parsley", "parsnip", "part", "participant", "participation", "particle", "particular", "partner", "partnership", +"parole", "parrot", "parser", "parsley", "parsnip", "participant", "participation", "particle", "particular", "partner", "partnership", "partridge", "party", "pass", "passage", "passbook", "passenger", "passing", "passion", "passive", "passport", "password", "past", "pasta", "paste", "pastor", "pastoralist", "pastry", "pasture", "pat", "patch", "pate", "patent", "patentee", "path", "pathogenesis", "pathology", "pathway", "patience", "patient", "patina", "patio", "patriarch", "patrimony", "patriot", "patrol", "patroller", "patrolling", "patron", @@ -413,26 +817,26 @@ std::initializer_list nouns "physical", "physics", "physiology", "pianist", "piano", "piccolo", "pick", "pickax", "pickaxe", "picket", "pickle", "pickup", "picnic", "picture", "picturesque", "pie", "piece", "pier", "piety", "pig", "pigeon", "piglet", "pigpen", "pigsty", "pike", "pilaf", "pile", "pilgrim", "pilgrimage", "pill", "pillar", "pillbox", "pillow", "pilot", "pimp", "pimple", "pin", "pinafore", "pine", "pineapple", -"pinecone", "ping", "pink", "pinkie", "pinot", "pinstripe", "pint", "pinto", "pinworm", "pioneer", "pipe", "pipeline", "piracy", "pirate", +"pinecone", "ping", "pink", "pinkie", "pinot", "pinstripe", "pint", "pinto", "pinworm", "pioneer", "pipe", "piracy", "pirate", "pistol", "pit", "pita", "pitch", "pitcher", "pitching", "pith", "pizza", "place", "placebo", "placement", "placode", "plagiarism", -"plain", "plaintiff", "plan", "plane", "planet", "planning", "plant", "plantation", "planter", "planula", "plaster", "plasterboard", +"plain", "plaintiff", "plane", "planet", "planning", "plant", "plantation", "planter", "planula", "plaster", "plasterboard", "plastic", "plate", "platelet", "platform", "platinum", "platter", "platypus", "play", "player", "playground", "playroom", "playwright", "plea", "pleasure", "pleat", "pledge", "plenty", "plier", "pliers", "plight", "plot", "plough", "plover", "plow", "plowman", "plug", "plugin", "plum", "plumber", "plume", "plunger", "plywood", "pneumonia", "pocket", "pocketbook", "pod", "podcast", "poem", "poet", "poetry", "poignance", "point", "poison", "poisoning", "poker", "polarisation", "polarization", "pole", "polenta", "police", -"policeman", "policy", "polish", "politician", "politics", "poll", "polliwog", "pollutant", "pollution", "polo", "polyester", "polyp", +"policeman", "polish", "politician", "politics", "poll", "polliwog", "pollutant", "pollution", "polo", "polyester", "polyp", "pomegranate", "pomelo", "pompom", "poncho", "pond", "pony", "pool", "poor", "pop", "popcorn", "poppy", "popsicle", "popularity", "population", "populist", "porcelain", "porch", "porcupine", "pork", "porpoise", "port", "porter", "portfolio", "porthole", "portion", "portrait", -"position", "possession", "possibility", "possible", "post", "postage", "postbox", "poster", "posterior", "postfix", "pot", "potato", +"possession", "possibility", "possible", "post", "postage", "postbox", "poster", "posterior", "postfix", "pot", "potato", "potential", "pottery", "potty", "pouch", "poultry", "pound", "pounding", "poverty", "powder", "power", "practice", "practitioner", "prairie", -"praise", "pray", "prayer", "precedence", "precedent", "precipitation", "precision", "predecessor", "preface", "preference", "prefix", +"praise", "pray", "prayer", "precedence", "precedent", "precipitation", "predecessor", "preface", "preference", "prefix", "pregnancy", "prejudice", "prelude", "premeditation", "premier", "premise", "premium", "preoccupation", "preparation", "prescription", "presence", "present", "presentation", "preservation", "preserves", "presidency", "president", "press", "pressroom", "pressure", "pressurisation", "pressurization", "prestige", "presume", "pretzel", "prevalence", "prevention", "prey", "price", "pricing", "pride", "priest", "priesthood", -"primary", "primate", "prince", "princess", "principal", "principle", "print", "printer", "printing", "prior", "priority", "prison", +"primate", "prince", "princess", "principal", "principle", "print", "printer", "printing", "prior", "priority", "prison", "prisoner", "privacy", "private", "privilege", "prize", "prizefight", "probability", "probation", "probe", "problem", "procedure", "proceedings", "process", "processing", "processor", "proctor", "procurement", "produce", "producer", "product", "production", "productivity", "profession", -"professional", "professor", "profile", "profit", "progenitor", "program", "programme", "programming", "progress", "progression", "prohibition", +"professional", "professor", "profit", "progenitor", "program", "programme", "programming", "progress", "progression", "prohibition", "project", "proliferation", "promenade", "promise", "promotion", "prompt", "pronoun", "pronunciation", "proof", "propaganda", "propane", "property", "prophet", "proponent", "proportion", "proposal", "proposition", "proprietor", "prose", "prosecution", "prosecutor", "prospect", "prosperity", "prostacyclin", "prostanoid", "prostrate", "protection", "protein", "protest", "protocol", "providence", "provider", @@ -440,14 +844,14 @@ std::initializer_list nouns "psychologist", "psychology", "ptarmigan", "pub", "public", "publication", "publicity", "publisher", "publishing", "pudding", "puddle", "puffin", "pug", "puggle", "pulley", "pulse", "puma", "pump", "pumpernickel", "pumpkin", "pumpkinseed", "pun", "punch", "punctuation", "punishment", "pup", "pupa", "pupil", "puppet", "puppy", "purchase", "puritan", "purity", "purple", "purpose", "purr", "purse", "pursuit", -"push", "pusher", "put", "puzzle", "pyramid", "pyridine", "quadrant", "quail", "qualification", "quality", "quantity", "quart", "quarter", -"quartet", "quartz", "queen", "query", "quest", "question", "questioner", "questionnaire", "quiche", "quicksand", "quiet", "quill", "quilt", -"quince", "quinoa", "quit", "quiver", "quota", "quotation", "quote", "rabbi", "rabbit", "raccoon", "race", "racer", "racing", "racism", +"push", "pusher", "put", "puzzle", "pyramid", "pyridine", "quadrant", "quail", "qualification", "quality", "quantity", "quart", +"quartet", "quartz", "queen", "quest", "question", "questioner", "questionnaire", "quiche", "quicksand", "quiet", "quill", "quilt", +"quince", "quinoa", "quit", "quiver", "quotation", "quote", "rabbi", "rabbit", "raccoon", "race", "racer", "racing", "racism", "racist", "rack", "radar", "radiator", "radio", "radiosonde", "radish", "raffle", "raft", "rag", "rage", "raid", "rail", "railing", "railroad", "railway", "raiment", "rain", "rainbow", "raincoat", "rainmaker", "rainstorm", "rainy", "raise", "raisin", "rake", "rally", "ram", "rambler", -"ramen", "ramie", "ranch", "rancher", "randomisation", "randomization", "range", "ranger", "rank", "rap", "rape", "raspberry", "rat", +"ramen", "ramie", "ranch", "rancher", "randomisation", "randomization", "ranger", "rank", "rap", "rape", "raspberry", "rat", "rate", "ratepayer", "rating", "ratio", "rationale", "rations", "raven", "ravioli", "rawhide", "ray", "rayon", "razor", "reach", "reactant", -"reaction", "read", "reader", "readiness", "reading", "real", "reality", "realization", "realm", "reamer", "rear", "reason", "reasoning", +"reaction", "read", "reader", "readiness", "reading", "real", "reality", "realization", "reamer", "rear", "reason", "reasoning", "rebel", "rebellion", "reboot", "recall", "recapitulation", "receipt", "receiver", "reception", "receptor", "recess", "recession", "recipe", "recipient", "reciprocity", "reclamation", "recliner", "recognition", "recollection", "recommendation", "reconsideration", "record", "recorder", "recording", "recovery", "recreation", "recruit", "rectangle", "red", "redesign", "redhead", "redirect", "rediscovery", "reduction", @@ -457,21 +861,21 @@ std::initializer_list nouns "reliability", "relief", "religion", "relish", "reluctance", "remains", "remark", "reminder", "remnant", "remote", "removal", "renaissance", "rent", "reorganisation", "reorganization", "repair", "reparation", "repayment", "repeat", "replacement", "replica", "replication", "reply", "report", "reporter", "reporting", "repository", "representation", "representative", "reprocessing", "republic", "republican", "reputation", -"request", "requirement", "resale", "rescue", "research", "researcher", "resemblance", "reservation", "reserve", "reservoir", "reset", +"request", "requirement", "resale", "rescue", "research", "researcher", "resemblance", "reservation", "reserve", "reservoir", "residence", "resident", "residue", "resist", "resistance", "resolution", "resolve", "resort", "resource", "respect", "respite", "response", -"responsibility", "rest", "restaurant", "restoration", "restriction", "restroom", "restructuring", "result", "resume", "retailer", "retention", +"responsibility", "rest", "restaurant", "restoration", "restriction", "restroom", "restructuring", "result", "retailer", "retention", "rethinking", "retina", "retirement", "retouching", "retreat", "retrospect", "retrospective", "retrospectivity", "return", "reunion", "revascularisation", "revascularization", "reveal", "revelation", "revenant", "revenge", "revenue", "reversal", "reverse", "review", "revitalisation", "revitalization", "revival", "revolution", "revolver", "reward", "rhetoric", "rheumatism", "rhinoceros", "rhubarb", -"rhyme", "rhythm", "rib", "ribbon", "rice", "riddle", "ride", "rider", "ridge", "riding", "rifle", "right", "rim", "ring", "ringworm", +"rhyme", "rhythm", "rib", "ribbon", "rice", "riddle", "ride", "rider", "ridge", "riding", "rifle", "rim", "ring", "ringworm", "riot", "rip", "ripple", "rise", "riser", "risk", "rite", "ritual", "river", "riverbed", "rivulet", "road", "roadway", "roar", "roast", -"robe", "robin", "robot", "robotics", "rock", "rocker", "rocket", "rod", "role", "roll", "roller", "romaine", "romance", +"robe", "robin", "robot", "robotics", "rock", "rocker", "rocket", "rod", "roll", "roller", "romaine", "romance", "roof", "room", "roommate", "rooster", "root", "rope", "rose", "rosemary", "roster", "rostrum", "rotation", "round", "roundabout", "route", -"router", "routine", "row", "rowboat", "rowing", "rubber", "rubric", "ruby", "ruckus", "rudiment", "ruffle", "rug", "rugby", +"router", "routine", "rowboat", "rowing", "rubber", "rubric", "ruby", "ruckus", "rudiment", "ruffle", "rug", "rugby", "ruin", "rule", "ruler", "ruling", "rum", "rumor", "run", "runaway", "runner", "running", "runway", "rush", "rust", "rutabaga", "rye", "sabre", "sac", "sack", "saddle", "sadness", "safari", "safe", "safeguard", "safety", "saffron", "sage", "sail", "sailboat", "sailing", -"sailor", "saint", "sake", "salad", "salami", "salary", "sale", "salesman", "salmon", "salon", "saloon", "salsa", "salt", "salute", "samovar", -"sampan", "sample", "samurai", "sanction", "sanctity", "sanctuary", "sand", "sandal", "sandbar", "sandpaper", "sandwich", "sanity", "sardine", +"sailor", "saint", "sake", "salad", "salami", "salary", "sale", "salesman", "salmon", "salon", "saloon", "salsa", "salute", "samovar", +"sampan", "samurai", "sanction", "sanctity", "sanctuary", "sand", "sandal", "sandbar", "sandpaper", "sandwich", "sanity", "sardine", "sari", "sarong", "sash", "satellite", "satin", "satire", "satisfaction", "sauce", "saucer", "sauerkraut", "sausage", "savage", "savannah", "saving", "savings", "savior", "saviour", "savory", "saw", "saxophone", "scaffold", "scale", "scallion", "scallops", "scalp", "scam", "scanner", "scarecrow", "scarf", "scarification", "scenario", "scene", "scenery", "scent", "schedule", "scheduling", "schema", "scheme", @@ -479,20 +883,20 @@ std::initializer_list nouns "scooter", "scope", "score", "scorn", "scorpion", "scotch", "scout", "scow", "scrambled", "scrap", "scraper", "scratch", "screamer", "screen", "screening", "screenwriting", "screw", "screwdriver", "scrim", "scrip", "script", "scripture", "scrutiny", "sculpting", "sculptural", "sculpture", "sea", "seabass", "seafood", "seagull", "seal", "seaplane", "search", "seashore", "seaside", "season", "seat", -"seaweed", "second", "secrecy", "secret", "secretariat", "secretary", "secretion", "section", "sectional", "sector", "security", "sediment", +"seaweed", "secrecy", "secret", "secretariat", "secretary", "secretion", "section", "sectional", "sector", "security", "sediment", "seed", "seeder", "seeker", "seep", "segment", "seizure", "selection", "self", "seller", "selling", "semantics", "semester", "semicircle", "semicolon", "semiconductor", "seminar", "senate", "senator", "sender", "senior", "sense", "sensibility", "sensitive", "sensitivity", "sensor", "sentence", "sentencing", "sentiment", "sepal", "separation", "septicaemia", "sequel", -"sequence", "serial", "series", "sermon", "serum", "serval", "servant", "server", "service", "servitude", "sesame", "session", "set", -"setback", "setting", "settlement", "settler", "severity", "sewer", "sex", "sexuality", "shack", "shackle", "shade", "shadow", "shadowbox", +"sequence", "serial", "series", "sermon", "serum", "serval", "servant", "service", "servitude", "sesame", "session", +"setback", "settlement", "settler", "severity", "sewer", "sex", "sexuality", "shack", "shackle", "shade", "shadow", "shadowbox", "shakedown", "shaker", "shallot", "shallows", "shame", "shampoo", "shanty", "shape", "share", "shareholder", "shark", "shaw", "shawl", "shear", "shearling", "sheath", "shed", "sheep", "sheet", "shelf", "shell", "shelter", "sherbet", "sherry", "shield", "shift", "shin", "shine", "shingle", "ship", "shipper", "shipping", "shipyard", "shirt", "shirtdress", "shoat", "shock", "shoe", "shoehorn", "shoelace", "shoemaker", "shoes", "shoestring", "shofar", "shoot", "shootdown", "shop", "shopper", "shopping", "shore", "shoreline", -"short", "shortage", "shorts", "shortwave", "shot", "shoulder", "shout", "shovel", "show", "shower", "shred", "shrimp", +"short", "shortage", "shorts", "shortwave", "shot", "shoulder", "shout", "shovel", "shower", "shred", "shrimp", "shrine", "shutdown", "sibling", "sick", "sickness", "side", "sideboard", "sideburns", "sidecar", "sidestream", "sidewalk", "siding", "siege", "sigh", "sight", "sightseeing", "sign", "signal", "signature", "signet", "significance", "signify", "signup", "silence", "silica", -"silicon", "silk", "silkworm", "sill", "silly", "silo", "silver", "similarity", "simple", "simplicity", "simplification", "simvastatin", +"silicon", "silk", "silkworm", "sill", "silly", "silo", "silver", "similarity", "simplicity", "simplification", "simvastatin", "sin", "singer", "singing", "singular", "sink", "sinuosity", "sip", "sir", "sister", "sitar", "site", "situation", "size", "skate", "skating", "skean", "skeleton", "ski", "skiing", "skill", "skin", "skirt", "skull", "skullcap", "skullduggery", "skunk", "sky", "skylight", "skyline", "skyscraper", "skywalk", "slang", "slapstick", "slash", "slate", "slavery", "slaw", "sled", "sledge", @@ -503,7 +907,7 @@ std::initializer_list nouns "society", "sociology", "sock", "socks", "soda", "sofa", "softball", "softdrink", "softening", "software", "soil", "soldier", "sole", "solicitation", "solicitor", "solidarity", "solidity", "soliloquy", "solitaire", "solution", "solvency", "sombrero", "somebody", "someone", "someplace", "somersault", "something", "somewhere", "son", "sonar", "sonata", "song", "songbird", "sonnet", "soot", "sophomore", "soprano", -"sorbet", "sorghum", "sorrel", "sorrow", "sort", "soul", "soulmate", "sound", "soundness", "soup", "source", "sourwood", "sousaphone", +"sorbet", "sorghum", "sorrel", "sorrow", "sort", "soul", "soulmate", "sound", "soundness", "soup", "sourwood", "sousaphone", "south", "southeast", "souvenir", "sovereignty", "sow", "soy", "soybean", "space", "spacing", "spade", "spaghetti", "span", "spandex", "spank", "sparerib", "spark", "sparrow", "spasm", "spat", "spatula", "spawn", "speaker", "speakerphone", "speaking", "spear", "spec", "special", "specialist", "specialty", "species", "specification", "spectacle", "spectacles", "spectrograph", "spectrum", "speculation", @@ -515,11 +919,11 @@ std::initializer_list nouns "staff", "stag", "stage", "stain", "stair", "staircase", "stake", "stalk", "stall", "stallion", "stamen", "stamina", "stamp", "stance", "stand", "standard", "standardisation", "standardization", "standing", "standoff", "standpoint", "star", "starboard", "start", "starter", "state", "statement", "statin", "station", "statistic", "statistics", "statue", "status", "statute", "stay", "steak", -"stealth", "steam", "steamroller", "steel", "steeple", "stem", "stench", "stencil", "step", +"stealth", "steam", "steamroller", "steel", "steeple", "stem", "stench", "stencil", "stepdaughter", "stepmother", "stepson", "stereo", "stew", "steward", "stick", "sticker", "stiletto", "still", "stimulation", "stimulus", "sting", "stinger", "stitch", "stitcher", "stock", "stockings", "stole", "stomach", "stone", "stonework", "stool", -"stop", "stopsign", "stopwatch", "storage", "store", "storey", "storm", "story", "storyboard", "stot", "stove", "strait", +"stop", "stopsign", "stopwatch", "store", "storey", "storm", "story", "storyboard", "stot", "stove", "strait", "strand", "stranger", "strap", "strategy", "straw", "strawberry", "strawman", "stream", "street", "streetcar", "strength", "stress", "stretch", "strife", "strike", "string", "strip", "stripe", "strobe", "stroke", "structure", "strudel", "struggle", "stucco", "stud", "student", "studio", "study", "stuff", "stumbling", "stump", "stupidity", "sturgeon", "sty", "style", "styling", "stylus", "sub", "subcomponent", @@ -533,16 +937,16 @@ std::initializer_list nouns "suspenders", "suspension", "sustainment", "sustenance", "swallow", "swamp", "swan", "swanling", "swath", "sweat", "sweater", "sweatshirt", "sweatshop", "sweatsuit", "sweets", "swell", "swim", "swimming", "swimsuit", "swine", "swing", "switch", "switchboard", "switching", "swivel", "sword", "swordfight", "swordfish", "sycamore", "symbol", "symmetry", "sympathy", "symptom", "syndicate", "syndrome", "synergy", -"synod", "synonym", "synthesis", "syrup", "system", "tab", "tabby", "tabernacle", "tablecloth", "tablet", "tabletop", +"synod", "synonym", "synthesis", "syrup", "tab", "tabby", "tabernacle", "tablecloth", "tablet", "tabletop", "tachometer", "tackle", "taco", "tactics", "tactile", "tadpole", "tag", "tail", "tailbud", "tailor", "tailspin", "takeover", "tale", "talent", "talk", "talking", "tamale", "tambour", "tambourine", "tan", "tandem", "tangerine", "tank", "tanker", "tankful", "tap", "tape", "tapioca", "target", "taro", "tarragon", "tart", "task", "tassel", "taste", "tatami", "tattler", "tattoo", "tavern", "tax", "taxi", "taxicab", "taxpayer", "tea", "teacher", "teaching", "team", "teammate", "teapot", "tear", "tech", "technician", "technique", "technologist", "technology", "tectonics", "teen", "teenager", "teepee", "telephone", "telescreen", "teletype", -"television", "tell", "teller", "temp", "temper", "temperature", "temple", "tempo", "temporariness", "temporary", "temptation", "temptress", +"television", "tell", "teller", "temp", "temper", "temperature", "temple", "tempo", "temporariness", "temptation", "temptress", "tenant", "tendency", "tender", "tenement", "tenet", "tennis", "tenor", "tension", "tensor", "tent", "tentacle", "tenth", "tepee", "teriyaki", "term", "terminal", "termination", "terminology", "termite", "terrace", "terracotta", "terrapin", "terrarium", "territory", "terror", -"terrorism", "terrorist", "test", "testament", "testimonial", "testimony", "testing", "text", "textbook", "textual", "texture", "thanks", +"terrorism", "terrorist", "testament", "testimonial", "testimony", "testing", "text", "textbook", "textual", "texture", "thanks", "thaw", "theater", "theft", "theism", "theme", "theology", "theory", "therapist", "therapy", "thermals", "thermometer", "thermostat", "thesis", "thickness", "thief", "thigh", "thing", "thinking", "thirst", "thistle", "thong", "thongs", "thorn", "thought", "thousand", "thread", "threat", "threshold", "thrift", "thrill", "throat", "throne", "thrush", "thrust", "thug", "thumb", "thump", "thunder", "thunderbolt", @@ -550,49 +954,49 @@ std::initializer_list nouns "timber", "time", "timeline", "timeout", "timer", "timetable", "timing", "timpani", "tin", "tinderbox", "tinkle", "tintype", "tip", "tire", "tissue", "titanium", "title", "toad", "toast", "toaster", "tobacco", "today", "toe", "toenail", "toffee", "tofu", "tog", "toga", "toilet", "tolerance", "tolerant", "toll", "tomatillo", "tomato", "tomb", "tomography", "tomorrow", "ton", "tonality", "tone", "tongue", -"tonic", "tonight", "tool", "toot", "tooth", "toothbrush", "toothpaste", "toothpick", "top", "topic", "topsail", "toque", +"tonic", "tonight", "tool", "toot", "tooth", "toothbrush", "toothpaste", "toothpick", "topic", "topsail", "toque", "toreador", "tornado", "torso", "torte", "tortellini", "tortilla", "tortoise", "tosser", "total", "tote", "touch", "tour", "tourism", "tourist", "tournament", "towel", "tower", "town", "townhouse", "township", "toy", "trace", "trachoma", "track", -"tracking", "tracksuit", "tract", "tractor", "trade", "trader", "trading", "tradition", "traditionalism", "traffic", "trafficker", "tragedy", -"trail", "trailer", "trailpatrol", "train", "trainer", "training", "trait", "tram", "tramp", "trance", "transaction", "transcript", "transfer", +"tracksuit", "tract", "tractor", "trade", "trader", "trading", "tradition", "traditionalism", "traffic", "trafficker", "tragedy", +"trail", "trailer", "trailpatrol", "train", "trainer", "training", "trait", "tram", "tramp", "trance", "transcript", "transfer", "transformation", "transit", "transition", "translation", "transmission", "transom", "transparency", "transplantation", "transport", "transportation", "trap", "trapdoor", "trapezium", "trapezoid", "trash", "travel", "traveler", "tray", "treasure", "treasury", "treat", -"treatment", "treaty", "tree", "trek", "trellis", "tremor", "trench", "trend", "triad", "trial", "triangle", "tribe", "tributary", "trick", -"trigger", "trigonometry", "trillion", "trim", "trinket", "trip", "tripod", "tritone", "triumph", "trolley", "trombone", "troop", "trooper", +"treatment", "treaty", "trek", "trellis", "tremor", "trench", "trend", "triad", "trial", "triangle", "tribe", "tributary", "trick", +"trigonometry", "trillion", "trinket", "trip", "tripod", "tritone", "triumph", "trolley", "trombone", "troop", "trooper", "trophy", "trouble", "trousers", "trout", "trove", "trowel", "truck", "trumpet", "trunk", "trust", "trustee", "truth", "try", "tsunami", "tub", "tuba", "tube", "tuber", "tug", "tugboat", "tuition", "tulip", "tumbler", "tummy", "tuna", "tune", "tunic", "tunnel", "turban", "turf", "turkey", "turmeric", "turn", "turning", "turnip", "turnover", "turnstile", "turret", "turtle", "tusk", "tussle", "tutu", -"tuxedo", "tweet", "tweezers", "twig", "twilight", "twine", "twins", "twist", "twister", "twitter", "type", "typeface", "typewriter", +"tuxedo", "tweet", "tweezers", "twig", "twilight", "twine", "twins", "twist", "twister", "twitter", "typeface", "typewriter", "typhoon", "ukulele", "ultimatum", "umbrella", "unblinking", "uncertainty", "uncle", "underclothes", "underestimate", "underground", "underneath", "underpants", "underpass", "undershirt", "understanding", "understatement", "undertaker", "underwear", "underweight", "underwire", -"underwriting", "unemployment", "unibody", "uniform", "uniformity", "unique", "unit", "unity", "universe", "university", "update", -"upgrade", "uplift", "upper", "upstairs", "upward", "urge", "urgency", "urn", "usage", "use", "user", "usher", "usual", "utensil", "utilisation", +"underwriting", "unemployment", "unibody", "uniform", "uniformity", "unit", "unity", "universe", "university", +"upgrade", "uplift", "upper", "upstairs", "upward", "urge", "urgency", "urn", "usage", "usher", "usual", "utensil", "utilisation", "utility", "utilization", "vacation", "vaccine", "vacuum", "vagrant", "valance", "valentine", "validate", "validity", "valley", "valuable", -"value", "vampire", "van", "vanadyl", "vane", "vanilla", "vanity", "variability", "variable", "variant", "variation", "variety", "vascular", +"vampire", "van", "vanadyl", "vane", "vanilla", "vanity", "variability", "variable", "variant", "variation", "variety", "vascular", "vase", "vault", "vaulting", "veal", "vector", "vegetable", "vegetarian", "vegetarianism", "vegetation", "vehicle", "veil", "vein", "veldt", "vellum", "velocity", "velodrome", "velvet", "vendor", "veneer", "vengeance", "venison", "venom", "venti", "venture", "venue", "veranda", "verb", "verdict", "verification", "vermicelli", "vernacular", "verse", "version", "vertigo", "verve", "vessel", "vest", "vestment", "vet", "veteran", "veterinarian", "veto", "viability", "vibe", "vibraphone", "vibration", "vibrissae", "vice", "vicinity", "victim", -"victory", "video", "view", "viewer", "vignette", "villa", "village", "vine", "vinegar", "vineyard", "vintage", "vintner", "vinyl", "viola", +"victory", "video", "viewer", "vignette", "villa", "village", "vine", "vinegar", "vineyard", "vintage", "vintner", "vinyl", "viola", "violation", "violence", "violet", "violin", "virginal", "virtue", "virus", "visa", "viscose", "vise", "vision", "visit", "visitor", "visor", "vista", "visual", "vitality", "vitamin", "vitro", "vivo", "vixen", "vodka", "vogue", "voice", "void", "vol", "volatility", -"volcano", "volleyball", "volume", "volunteer", "volunteering", "vomit", "vote", "voter", "voting", "voyage", "vulture", "wad", "wafer", +"volcano", "volleyball", "volunteer", "volunteering", "vomit", "vote", "voter", "voting", "voyage", "vulture", "wad", "wafer", "waffle", "wage", "wagon", "waist", "waistband", "wait", "waiter", "waiting", "waitress", "waiver", "wake", "walk", "walker", "walking", "walkway", "wall", "wallaby", "wallet", "walnut", "walrus", "wampum", "wannabe", "want", "war", "warden", "wardrobe", "warfare", "warlock", "warlord", "warming", "warmth", "warning", "warrant", "warren", "warrior", "wasabi", "wash", "washbasin", "washcloth", "washer", -"washtub", "wasp", "waste", "wastebasket", "wasting", "watch", "watcher", "watchmaker", "water", "waterbed", "watercress", "waterfall", +"washtub", "wasp", "waste", "wastebasket", "wasting", "watcher", "watchmaker", "water", "waterbed", "watercress", "waterfall", "waterfront", "watermelon", "waterskiing", "waterspout", "waterwheel", "wave", "waveform", "wax", "way", "weakness", "wealth", "weapon", -"wear", "weasel", "weather", "web", "webinar", "webmail", "webpage", "website", "wedding", "wedge", "weed", "weeder", "weedkiller", "week", +"wear", "weasel", "weather", "web", "webinar", "webmail", "webpage", "website", "wedding", "wedge", "weed", "weeder", "weedkiller", "weekend", "weekender", "weight", "weird", "welcome", "welfare", "well", "west", "western", "wetland", "wetsuit", "whack", "whale", "wharf", "wheat", "wheel", "whelp", "whey", "whip", "whirlpool", "whirlwind", "whisker", "whiskey", "whisper", "whistle", "white", "whole", "wholesale", "wholesaler", "whorl", "wick", "widget", "widow", "width", "wife", "wifi", "wild", "wildebeest", "wilderness", -"wildlife", "will", "willingness", "willow", "win", "wind", "windage", "window", "windscreen", "windshield", "wine", "winery", +"wildlife", "will", "willingness", "willow", "win", "wind", "windage", "windscreen", "windshield", "wine", "winery", "wing", "wingman", "wingtip", "wink", "winner", "winter", "wire", "wiretap", "wiring", "wisdom", "wiseguy", "wish", "wisteria", "wit", "witch", "withdrawal", "witness", "wok", "wolf", "woman", "wombat", "wonder", "wont", "wood", "woodchuck", "woodland", "woodshed", "woodwind", "wool", "woolens", "word", "wording", "work", "workbench", "worker", "workforce", "workhorse", "working", "workout", "workplace", "workshop", "world", "worm", "worry", "worship", "worshiper", "worth", "wound", "wrap", "wraparound", "wrapper", "wrapping", "wreck", "wrecker", "wren", "wrench", "wrestler", "wriggler", "wrinkle", "wrist", "writer", "writing", "wrong", "xylophone", "yacht", -"yahoo", "yak", "yam", "yang", "yard", "yarmulke", "yarn", "yawl", "year", "yeast", "yellow", "yellowjacket", "yesterday", "yew", "yin", +"yahoo", "yak", "yam", "yang", "yard", "yarmulke", "yarn", "yawl", "yeast", "yellow", "yellowjacket", "yesterday", "yew", "yin", "yoga", "yogurt", "yoke", "yolk", "young", "youngster", "yourself", "youth", "yoyo", "yurt", "zampone", "zebra", "zebrafish", "zen", "zephyr", "zero", "ziggurat", "zinc", "zipper", "zither", "zombie", "zone", "zoo", "zoologist", "zoology", "zucchini" }; @@ -602,7 +1006,7 @@ std::string_view obfuscateWord(std::string_view src, WordMap & obfuscate_map, Wo { /// Prevent using too many nouns if (obfuscate_map.size() * 2 > nouns.size()) - throw Exception("Too many unique identifiers in queries", ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS); + throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Too many unique identifiers in queries"); std::string_view & mapped = obfuscate_map[src]; if (!mapped.empty()) @@ -637,7 +1041,10 @@ void obfuscateIdentifier(std::string_view src, WriteBuffer & result, WordMap & o { std::string_view word(word_begin, src_pos - word_begin); - if (keep_words.contains(word)) + String wordcopy(word_begin, src_pos - word_begin); + Poco::toUpperInPlace(wordcopy); + + if (keep_words.contains(wordcopy)) { result.write(word.data(), word.size()); } @@ -805,18 +1212,28 @@ void obfuscateLiteral(std::string_view src, WriteBuffer & result, SipHash hash_f while (alpha_end < src_end && isAlphaASCII(*alpha_end)) ++alpha_end; - hash_func.update(src_pos, alpha_end - src_pos); - pcg64 rng(hash_func.get64()); - - while (src_pos < alpha_end) + String wordcopy(src_pos, alpha_end); + Poco::toUpperInPlace(wordcopy); + if (keep_words.contains(wordcopy)) { - auto random = rng(); - if (isLowerAlphaASCII(*src_pos)) - result.write('a' + random % 26); - else - result.write('A' + random % 26); + result.write(src_pos, alpha_end - src_pos); + src_pos = alpha_end; + } + else + { + hash_func.update(src_pos, alpha_end - src_pos); + pcg64 rng(hash_func.get64()); - ++src_pos; + while (src_pos < alpha_end) + { + auto random = rng(); + if (isLowerAlphaASCII(*src_pos)) + result.write('a' + random % 26); + else + result.write('A' + random % 26); + + ++src_pos; + } } } else if (isASCII(src_pos[0])) diff --git a/src/Parsers/parseQuery.cpp b/src/Parsers/parseQuery.cpp index da8450ac301..8d794409f78 100644 --- a/src/Parsers/parseQuery.cpp +++ b/src/Parsers/parseQuery.cpp @@ -359,7 +359,7 @@ ASTPtr parseQueryAndMovePosition( if (res) return res; - throw Exception(error_message, ErrorCodes::SYNTAX_ERROR); + throw Exception::createDeprecated(error_message, ErrorCodes::SYNTAX_ERROR); } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 2a9d06bc17b..0d302fda904 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -1377,8 +1377,7 @@ void Planner::buildPlanForQueryNode() */ if (query_node.hasLimit() && apply_limit && !limit_applied && apply_offset) addLimitStep(query_plan, query_analysis_result, planner_context, query_node); - - if (apply_offset && query_node.hasOffset()) + else if (!limit_applied && apply_offset && query_node.hasOffset()) addOffsetStep(query_plan, query_analysis_result); const auto & projection_analysis_result = expression_analysis_result.getProjection(); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 2484bd093ec..95269f70bcc 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -341,7 +341,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl } else { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", + table_expression->formatASTForErrorMessage()); } if (from_stage == QueryProcessingStage::FetchColumns) diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index f6152e324c9..4546e84a02b 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -725,7 +725,8 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo if (table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO)) return std::make_shared(table_join, right_table_expression_header); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't execute any of specified algorithms for specified strictness/kind and right storage type"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Can't execute any of specified algorithms for specified strictness/kind and right storage type"); } } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index fa6bd774960..221e683f4e2 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -236,8 +236,9 @@ bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node) default: { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", - join_tree_node_to_process->getNodeTypeName()); + "Unexpected node type for table expression. " + "Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); } } } @@ -301,8 +302,9 @@ bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_no default: { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", - join_tree_node_to_process->getNodeTypeName()); + "Unexpected node type for table expression. " + "Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); } } } diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 61bd118636d..bbfa1683cf6 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -70,8 +70,8 @@ void Chunk::checkNumRowsIsConsistent() { auto & column = columns[i]; if (column->size() != num_rows) - throw Exception("Invalid number of rows in Chunk column " + column->getName()+ " position " + toString(i) + ": expected " + - toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of rows in Chunk column {}: expected {}, got {}", + column->getName()+ " position " + toString(i), toString(num_rows), toString(column->size())); } } @@ -108,8 +108,8 @@ void Chunk::addColumn(ColumnPtr column) if (empty()) num_rows = column->size(); else if (column->size() != num_rows) - throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " + - toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of rows in Chunk column {}, got {}", + column->getName()+ ": expected " + toString(num_rows), toString(column->size())); columns.emplace_back(std::move(column)); } @@ -133,11 +133,11 @@ void Chunk::addColumn(size_t position, ColumnPtr column) void Chunk::erase(size_t position) { if (columns.empty()) - throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Chunk is empty"); if (position >= columns.size()) - throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = " - + toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", + toString(position), toString(columns.size() - 1)); columns.erase(columns.begin() + position); } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index cd94ca7ceae..f84efabdee1 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -392,10 +392,34 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue void ExecutingGraph::cancel() { - std::lock_guard guard(processors_mutex); - for (auto & processor : *processors) - processor->cancel(); - cancelled = true; + std::exception_ptr exception_ptr; + + { + std::lock_guard guard(processors_mutex); + for (auto & processor : *processors) + { + try + { + processor->cancel(); + } + catch (...) + { + if (!exception_ptr) + exception_ptr = std::current_exception(); + + /// Log any exception since: + /// a) they are pretty rare (the only that I know is from + /// RemoteQueryExecutor) + /// b) there can be exception during query execution, and in this + /// case, this exception can be ignored (not showed to the user). + tryLogCurrentException("ExecutingGraph"); + } + } + cancelled = true; + } + + if (exception_ptr) + std::rethrow_exception(exception_ptr); } } diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index d5c2bfe7399..e61d225a968 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -41,7 +41,7 @@ void ExecutorTasks::tryWakeUpAnyOtherThreadWithTasks(ExecutionThreadContext & se thread_to_wake = threads_queue.popAny(); if (thread_to_wake >= use_threads) - throw Exception("Non-empty queue without allocated thread", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty queue without allocated thread"); lock.unlock(); executor_contexts[thread_to_wake]->wakeUp(); @@ -89,7 +89,7 @@ void ExecutorTasks::tryGetTask(ExecutionThreadContext & context) { if (finished) return; - throw Exception("Empty task was returned from async task queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty task was returned from async task queue"); } context.setTask(static_cast(res.data)); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index ad504e65f94..f1e044e470b 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -187,7 +187,7 @@ void PipelineExecutor::finalizeExecution() } if (!all_processors_finished) - throw Exception("Pipeline stuck. Current state:\n" + dumpPipeline(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline stuck. Current state:\n{}", dumpPipeline()); } void PipelineExecutor::executeSingleThread(size_t thread_num) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 5799fbcc5d8..fbbf8c119ce 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -175,20 +175,35 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) void PullingAsyncPipelineExecutor::cancel() { + if (!data) + return; + /// Cancel execution if it wasn't finished. - if (data && !data->is_finished && data->executor) - data->executor->cancel(); + try + { + if (!data->is_finished && data->executor) + data->executor->cancel(); + } + catch (...) + { + /// Store exception only of during query execution there was no + /// exception, since only one exception can be re-thrown. + if (!data->has_exception) + { + data->exception = std::current_exception(); + data->has_exception = true; + } + } /// The following code is needed to rethrow exception from PipelineExecutor. /// It could have been thrown from pull(), but we will not likely call it again. /// Join thread here to wait for possible exception. - if (data && data->thread.joinable()) + if (data->thread.joinable()) data->thread.join(); /// Rethrow exception to not swallow it in destructor. - if (data) - data->rethrowExceptionIfHas(); + data->rethrowExceptionIfHas(); } Chunk PullingAsyncPipelineExecutor::getTotals() diff --git a/src/Processors/Executors/TasksQueue.h b/src/Processors/Executors/TasksQueue.h index 542e15eb482..bb8996fc1a6 100644 --- a/src/Processors/Executors/TasksQueue.h +++ b/src/Processors/Executors/TasksQueue.h @@ -25,7 +25,7 @@ public: size_t getAnyThreadWithTasks(size_t from_thread = 0) { if (num_tasks == 0) - throw Exception("TaskQueue is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TaskQueue is empty"); for (size_t i = 0; i < queues.size(); ++i) { @@ -37,7 +37,7 @@ public: from_thread = 0; } - throw Exception("TaskQueue is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TaskQueue is empty"); } Task * pop(size_t thread_num) diff --git a/src/Processors/Executors/ThreadsQueue.h b/src/Processors/Executors/ThreadsQueue.h index 5aca8147706..9ccdf6bcc09 100644 --- a/src/Processors/Executors/ThreadsQueue.h +++ b/src/Processors/Executors/ThreadsQueue.h @@ -36,7 +36,7 @@ struct ThreadsQueue void push(size_t thread) { if (unlikely(has(thread))) - throw Exception("Can't push thread because it is already in threads queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't push thread because it is already in threads queue"); swapThreads(thread, stack[stack_size]); ++stack_size; @@ -45,7 +45,7 @@ struct ThreadsQueue void pop(size_t thread) { if (unlikely(!has(thread))) - throw Exception("Can't pop thread because it is not in threads queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't pop thread because it is not in threads queue"); --stack_size; swapThreads(thread, stack[stack_size]); @@ -54,7 +54,7 @@ struct ThreadsQueue size_t popAny() { if (unlikely(stack_size == 0)) - throw Exception("Can't pop from empty queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't pop from empty queue"); --stack_size; return stack[stack_size]; diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index f7374111a30..81c818e3334 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -114,7 +114,7 @@ Chunk IRowInputFormat::generate() { size_t column_size = columns[column_idx]->size(); if (column_size == 0) - throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Unexpected empty column"); block_missing_values.setBit(column_idx, column_size - 1); } } @@ -245,7 +245,7 @@ Chunk IRowInputFormat::generate() void IRowInputFormat::syncAfterError() { - throw Exception("Method syncAfterError is not implemented for input format", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method syncAfterError is not implemented for input format"); } void IRowInputFormat::resetParser() diff --git a/src/Processors/Formats/IRowOutputFormat.cpp b/src/Processors/Formats/IRowOutputFormat.cpp index ac44dbc0157..3c0692945d4 100644 --- a/src/Processors/Formats/IRowOutputFormat.cpp +++ b/src/Processors/Formats/IRowOutputFormat.cpp @@ -40,7 +40,7 @@ void IRowOutputFormat::consumeTotals(DB::Chunk chunk) auto num_rows = chunk.getNumRows(); if (num_rows != 1) - throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in totals chunk, expected 1", num_rows); const auto & columns = chunk.getColumns(); @@ -57,7 +57,7 @@ void IRowOutputFormat::consumeExtremes(DB::Chunk chunk) auto num_rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); if (num_rows != 2) - throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in extremes chunk, expected 2", num_rows); writeBeforeExtremes(); writeMinExtreme(columns, 0); diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 4599cdb8748..48cb093f0ab 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -21,12 +21,14 @@ void checkFinalInferredType(DataTypePtr & type, const String & name, const Forma { if (!default_type) throw Exception( - ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, - "Cannot determine type for column '{}' by first {} rows of data, most likely this column contains only Nulls or empty " - "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " - "If your data contains complex JSON objects, try enabling one of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", - name, - rows_read); + ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, + "Cannot determine type for column '{}' by first {} rows " + "of data, most likely this column contains only Nulls or empty " + "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " + "If your data contains complex JSON objects, try enabling one " + "of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", + name, + rows_read); type = default_type; } diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 9f3f4d880ef..edc5c6068c3 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -12,6 +12,7 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; + extern const int INCORRECT_DATA; } /// Base class for schema inference for the data in some specific format. @@ -176,6 +177,25 @@ void chooseResultColumnType( } } +template +void chooseResultColumnTypes( + SchemaReader & schema_reader, + DataTypes & types, + DataTypes & new_types, + const DataTypePtr & default_type, + const std::vector & column_names, + size_t row) +{ + if (types.size() != new_types.size()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); + + if (types.size() != column_names.size()) + throw Exception(ErrorCodes::INCORRECT_DATA, "The number of column names {} differs from the number of types {}", column_names.size(), types.size()); + + for (size_t i = 0; i != types.size(); ++i) + chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row); +} + void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read); Strings splitColumnNames(const String & column_names_str); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index ebd9783b4fd..96ed2a7021e 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,12 @@ arrow::Result> RandomAccessFileFromSeekableReadBu return buffer; } +arrow::Future> RandomAccessFileFromSeekableReadBuffer::ReadAsync(const arrow::io::IOContext &, int64_t position, int64_t nbytes) +{ + /// Just a stub to to avoid using internal arrow thread pool + return arrow::Future>::MakeFinished(ReadAt(position, nbytes)); +} + arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position) { seekable_in.seek(position, SEEK_SET); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index dc69b5a50fa..325975a7cfe 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -62,6 +62,11 @@ public: arrow::Result> Read(int64_t nbytes) override; + /// Override async reading to avoid using internal arrow thread pool. + /// In our code we don't use async reading, so implementation is sync, + /// we just call ReadAt and return future with ready value. + arrow::Future> ReadAsync(const arrow::io::IOContext&, int64_t position, int64_t nbytes) override; + arrow::Status Seek(int64_t position) override; private: diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 52c868d4e0c..68c40527097 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -44,6 +44,7 @@ M(arrow::Type::INT32, DB::Int32) \ M(arrow::Type::UINT64, DB::UInt64) \ M(arrow::Type::INT64, DB::Int64) \ + M(arrow::Type::DURATION, DB::Int64) \ M(arrow::Type::HALF_FLOAT, DB::Float32) \ M(arrow::Type::FLOAT, DB::Float32) \ M(arrow::Type::DOUBLE, DB::Float64) @@ -386,7 +387,9 @@ static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptrlength(); ++i) { if (data[i] < 0 || data[i] >= dict_size) - throw Exception(ErrorCodes::INCORRECT_DATA, "Index {} in Dictionary column is out of bounds, dictionary size is {}", Int64(data[i]), UInt64(dict_size)); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Index {} in Dictionary column is out of bounds, dictionary size is {}", + Int64(data[i]), UInt64(dict_size)); } /// If dictionary type is not nullable and arrow dictionary contains default type @@ -734,13 +737,15 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } throw Exception( - ErrorCodes::UNKNOWN_TYPE, - "Unsupported {} type '{}' of an input column '{}'. If it happens during schema inference and you want to skip columns with " - "unsupported types, you can enable setting input_format_{}_skip_columns_with_unsupported_types_in_schema_inference", - format_name, - arrow_column->type()->name(), - column_name, - boost::algorithm::to_lower_copy(format_name)); + ErrorCodes::UNKNOWN_TYPE, + "Unsupported {} type '{}' of an input column '{}'. " + "If it happens during schema inference and you want to skip columns with " + "unsupported types, you can enable setting input_format_{}" + "_skip_columns_with_unsupported_types_in_schema_inference", + format_name, + arrow_column->type()->name(), + column_name, + boost::algorithm::to_lower_copy(format_name)); } } } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 8c6cd8bd91b..9a475efa195 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -146,7 +146,7 @@ static void insertNumber(IColumn & column, WhichDataType type, T value) assert_cast &>(column).insertValue(static_cast(value)); break; default: - throw Exception("Type is not compatible with Avro", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type is not compatible with Avro"); } } @@ -193,7 +193,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node { decoder.decodeString(tmp); if (tmp.length() != 36) - throw ParsingException(std::string("Cannot parse uuid ") + tmp, ErrorCodes::CANNOT_PARSE_UUID); + throw ParsingException(ErrorCodes::CANNOT_PARSE_UUID, "Cannot parse uuid {}", tmp); UUID uuid; parseUUID(reinterpret_cast(tmp.data()), std::reverse_iterator(reinterpret_cast(&uuid) + 16)); @@ -468,10 +468,9 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node }; } - throw Exception( - "Type " + target_type->getName() + " is not compatible with Avro " + avro::toString(root_node->type()) + ":\n" - + nodeToJson(root_node), - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Type {} is not compatible with Avro {}:\n{}", + target_type->getName(), avro::toString(root_node->type()), nodeToJson(root_node)); } AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) @@ -519,7 +518,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) auto index = decoder.decodeUnionIndex(); if (index >= union_skip_fns.size()) { - throw Exception("Union index out of boundary", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Union index out of boundary"); } union_skip_fns[index](decoder); }; @@ -575,7 +574,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) }; } default: - throw Exception("Unsupported Avro type " + root_node->name().fullname() + " (" + toString(int(root_node->type())) + ")", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported Avro type {} ({})", root_node->name().fullname(), int(root_node->type())); } } @@ -719,7 +718,7 @@ AvroDeserializer::AvroDeserializer(const Block & header, avro::ValidSchema schem const auto & schema_root = schema.root(); if (schema_root->type() != avro::AVRO_RECORD) { - throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Root schema must be a record"); } column_found.resize(header.columns()); @@ -731,7 +730,7 @@ AvroDeserializer::AvroDeserializer(const Block & header, avro::ValidSchema schem { if (!column_found[i]) { - throw Exception("Field " + header.getByPosition(i).name + " not found in Avro schema", ErrorCodes::THERE_IS_NO_COLUMN); + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Field {} not found in Avro schema", header.getByPosition(i).name); } } } @@ -782,7 +781,7 @@ public: : base_url(base_url_), schema_cache(schema_cache_max_size) { if (base_url.empty()) - throw Exception("Empty Schema Registry URL", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty Schema Registry URL"); } avro::ValidSchema getSchema(uint32_t id) @@ -842,7 +841,7 @@ private: } catch (const avro::Exception & e) { - throw Exception(e.what(), ErrorCodes::INCORRECT_DATA); + throw Exception::createDeprecated(e.what(), ErrorCodes::INCORRECT_DATA); } } catch (Exception & e) @@ -889,7 +888,7 @@ static uint32_t readConfluentSchemaId(ReadBuffer & in) if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA) { /* empty or incomplete message without Avro Confluent magic number or schema id */ - throw Exception("Missing AvroConfluent magic byte or schema identifier.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Missing AvroConfluent magic byte or schema identifier."); } else throw; @@ -897,8 +896,8 @@ static uint32_t readConfluentSchemaId(ReadBuffer & in) if (magic != 0x00) { - throw Exception("Invalid magic byte before AvroConfluent schema identifier." - " Must be zero byte, found " + std::to_string(int(magic)) + " instead", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid magic byte before AvroConfluent schema identifier. " + "Must be zero byte, found {} instead", int(magic)); } return schema_id; @@ -977,7 +976,7 @@ NamesAndTypesList AvroSchemaReader::readSchema() } if (root_node->type() != avro::Type::AVRO_RECORD) - throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Root schema must be a record"); NamesAndTypesList names_and_types; for (int i = 0; i != static_cast(root_node->leaves()); ++i) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 4525d7d33b0..96370b8c4c7 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -103,7 +103,7 @@ private: auto index = decoder.decodeUnionIndex(); if (index >= actions.size()) { - throw Exception("Union index out of boundary", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Union index out of boundary"); } actions[index].execute(columns, decoder, ext); break; diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 7efe2a999b4..8483a91df62 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -49,10 +49,9 @@ public: : string_to_string_regexp(settings_.avro.string_column_pattern) { if (!string_to_string_regexp.ok()) - throw DB::Exception( - "Avro: cannot compile re2: " + settings_.avro.string_column_pattern + ", error: " + string_to_string_regexp.error() - + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", - DB::ErrorCodes::CANNOT_COMPILE_REGEXP); + throw DB::Exception(DB::ErrorCodes::CANNOT_COMPILE_REGEXP, "Avro: cannot compile re2: {}, error: {}. " + "Look at https://github.com/google/re2/wiki/Syntax for reference.", + settings_.avro.string_column_pattern, string_to_string_regexp.error()); } bool isStringAsString(const String & column_name) @@ -384,7 +383,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF default: break; } - throw Exception("Type " + data_type->getName() + " is not supported for Avro output", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for Avro output", data_type->getName()); } @@ -438,7 +437,7 @@ static avro::Codec getCodec(const std::string & codec_name) if (codec_name == "snappy") return avro::Codec::SNAPPY_CODEC; #endif - throw Exception("Avro codec " + codec_name + " is not available", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Avro codec {} is not available", codec_name); } AvroRowOutputFormat::AvroRowOutputFormat( diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp index 84d84756bd0..c4ffce2bc65 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -146,7 +146,8 @@ static void readAndInsertInteger(ReadBuffer & in, IColumn & column, const DataTy } else { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); } } @@ -154,7 +155,8 @@ template static void readAndInsertDouble(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) { if (bson_type != BSONType::DOUBLE) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); Float64 value; readBinary(value, in); @@ -165,7 +167,8 @@ template static void readAndInsertSmallDecimal(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) { if (bson_type != expected_bson_type) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); DecimalType value; readBinary(value, in); @@ -186,12 +189,14 @@ template static void readAndInsertBigInteger(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) { if (bson_type != BSONType::BINARY) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); auto size = readBSONSize(in); auto subtype = getBSONBinarySubtype(readBSONType(in)); if (subtype != BSONBinarySubtype::BINARY) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Binary subtype {} into column with type {}", getBSONBinarySubtypeName(subtype), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Binary subtype {} into column with type {}", + getBSONBinarySubtypeName(subtype), data_type->getName()); using ValueType = typename ColumnType::ValueType; @@ -216,7 +221,7 @@ static void readAndInsertStringImpl(ReadBuffer & in, IColumn & column, size_t si auto & fixed_string_column = assert_cast(column); size_t n = fixed_string_column.getN(); if (size > n) - throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string for FixedString column"); auto & data = fixed_string_column.getChars(); @@ -375,18 +380,20 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr & auto try_get_index = data_type_tuple->tryGetPositionByName(name.toString()); if (!try_get_index) throw Exception( - ErrorCodes::INCORRECT_DATA, - "Cannot parse tuple column with type {} from BSON array/embedded document field: tuple doesn't have element with name \"{}\"", - data_type->getName(), - name); + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field: " + "tuple doesn't have element with name \"{}\"", + data_type->getName(), + name); index = *try_get_index; } if (index >= data_type_tuple->getElements().size()) throw Exception( - ErrorCodes::INCORRECT_DATA, - "Cannot parse tuple column with type {} from BSON array/embedded document field: the number of fields BSON document exceeds the number of fields in tuple", - data_type->getName()); + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field: " + "the number of fields BSON document exceeds the number of fields in tuple", + data_type->getName()); readField(tuple_column.getColumn(index), data_type_tuple->getElement(index), nested_bson_type); ++read_nested_columns; @@ -396,11 +403,12 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr & if (read_nested_columns != data_type_tuple->getElements().size()) throw Exception( - ErrorCodes::INCORRECT_DATA, - "Cannot parse tuple column with type {} from BSON array/embedded document field, the number of fields in tuple and BSON document doesn't match: {} != {}", - data_type->getName(), - data_type_tuple->getElements().size(), - read_nested_columns); + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field, " + "the number of fields in tuple and BSON document doesn't match: {} != {}", + data_type->getName(), + data_type_tuple->getElements().size(), + read_nested_columns); } void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type) @@ -411,7 +419,9 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da const auto * data_type_map = assert_cast(data_type.get()); const auto & key_data_type = data_type_map->getKeyType(); if (!isStringOrFixedString(key_data_type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", key_data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Only maps with String key type are supported in BSON, got key type: {}", + key_data_type->getName()); const auto & value_data_type = data_type_map->getValueType(); auto & column_map = assert_cast(column); @@ -446,7 +456,9 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr & } if (!format_settings.null_as_default) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Null value into non-nullable column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot insert BSON Null value into non-nullable column with type {}", + data_type->getName()); column.insertDefault(); return false; diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp index c9530d4ba81..211021b0d78 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp @@ -229,7 +229,9 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co const auto & map_type = assert_cast(*data_type); if (!isStringOrFixedString(map_type.getKeyType())) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Only maps with String key type are supported in BSON, got key type: {}", + map_type.getKeyType()->getName()); const auto & value_type = map_type.getValueType(); const auto & map_column = assert_cast(column); @@ -452,7 +454,9 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da { const auto & map_type = assert_cast(*data_type); if (!isStringOrFixedString(map_type.getKeyType())) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Only maps with String key type are supported in BSON, got key type: {}", + map_type.getKeyType()->getName()); const auto & value_type = map_type.getValueType(); const auto & map_column = assert_cast(column); diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index a41cf687b39..a4f779076eb 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -90,7 +90,8 @@ void BinaryFormatReader::skipTypes() void BinaryFormatReader::skipField(size_t file_column) { if (file_column >= read_data_types.size()) - throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, "Cannot skip unknown field in RowBinaryWithNames format, because it's type is unknown"); + throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, + "Cannot skip unknown field in RowBinaryWithNames format, because it's type is unknown"); Field field; read_data_types[file_column]->getDefaultSerialization()->deserializeBinary(field, *in, format_settings); } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index 7e600c5b3dd..3d3d80f1043 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -8,11 +8,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - class ReadBuffer; /** A stream for inputting data in a binary line-by-line format. @@ -59,11 +54,6 @@ public: BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypes() override - { - throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypes is not implemented"}; - } - BinaryFormatReader reader; }; diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 8319ef65e17..8ef57ad4917 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -85,7 +85,7 @@ namespace DB static void checkStatus(const arrow::Status & status, const String & column_name, const String & format_name) { if (!status.ok()) - throw Exception{fmt::format("Error with a {} column \"{}\": {}.", format_name, column_name, status.ToString()), ErrorCodes::UNKNOWN_EXCEPTION}; + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error with a {} column \"{}\": {}.", format_name, column_name, status.ToString()); } template @@ -179,7 +179,7 @@ namespace DB if (need_rescale) { if (common::mulOverflow(value, rescale_multiplier, value)) - throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW); + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow"); } status = builder.Append(value); } @@ -295,8 +295,7 @@ namespace DB case TypeIndex::UInt64: return extractIndexesImpl(column, start, end, shift); default: - throw Exception(fmt::format("Indexes column must be ColumnUInt, got {}.", column->getName()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Indexes column must be ColumnUInt, got {}.", column->getName()); } } @@ -590,11 +589,8 @@ namespace DB #undef DISPATCH else { - throw Exception - { - fmt::format("Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name), - ErrorCodes::UNKNOWN_TYPE - }; + throw Exception(ErrorCodes::UNKNOWN_TYPE, + "Internal type '{}' of a column '{}' is not supported for conversion into {} data format.", column_type_name, column_name, format_name); } } @@ -641,8 +637,7 @@ namespace DB case TypeIndex::UInt64: return arrow::int64(); default: - throw Exception(fmt::format("Indexes column for getUniqueIndex must be ColumnUInt, got {}.", indexes_column->getName()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Indexes column for getUniqueIndex must be ColumnUInt, got {}.", indexes_column->getName()); } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 1f1bf99739a..18e5b4dfc13 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -14,6 +14,7 @@ namespace DB { + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -21,6 +22,19 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace +{ + void checkBadDelimiter(char delimiter) + { + constexpr std::string_view bad_delimiters = " \t\"'.UL"; + if (bad_delimiters.find(delimiter) != std::string_view::npos) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "CSV format may not work correctly with delimiter '{}'. Try use CustomSeparated format instead", + delimiter); + } +} + CSVRowInputFormat::CSVRowInputFormat( const Block & header_, ReadBuffer & in_, @@ -29,13 +43,13 @@ CSVRowInputFormat::CSVRowInputFormat( bool with_types_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, in_, params_, with_names_, with_types_, format_settings_, std::make_unique(in_, format_settings_)) + header_, std::make_shared(in_), params_, with_names_, with_types_, format_settings_) { } CSVRowInputFormat::CSVRowInputFormat( const Block & header_, - ReadBuffer & in_, + std::shared_ptr in_, const Params & params_, bool with_names_, bool with_types_, @@ -43,25 +57,49 @@ CSVRowInputFormat::CSVRowInputFormat( std::unique_ptr format_reader_) : RowInputFormatWithNamesAndTypes( header_, - in_, + *in_, params_, false, with_names_, with_types_, format_settings_, - std::move(format_reader_)) + std::move(format_reader_), + format_settings_.csv.try_detect_header), + buf(std::move(in_)) { - const String bad_delimiters = " \t\"'.UL"; - if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos) - throw Exception( - String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter - + "'. Try use CustomSeparated format instead.", - ErrorCodes::BAD_ARGUMENTS); + checkBadDelimiter(format_settings_.csv.delimiter); +} + +CSVRowInputFormat::CSVRowInputFormat( + const Block & header_, + std::shared_ptr in_, + const Params & params_, + bool with_names_, + bool with_types_, + const FormatSettings & format_settings_) + : RowInputFormatWithNamesAndTypes( + header_, + *in_, + params_, + false, + with_names_, + with_types_, + format_settings_, + std::make_unique(*in_, format_settings_), + format_settings_.csv.try_detect_header), + buf(std::move(in_)) +{ + checkBadDelimiter(format_settings_.csv.delimiter); } void CSVRowInputFormat::syncAfterError() { - skipToNextLineOrEOF(*in); + skipToNextLineOrEOF(*buf); +} + +void CSVRowInputFormat::setReadBuffer(ReadBuffer & in_) +{ + buf->setSubBuffer(in_); } static void skipEndOfLine(ReadBuffer & in) @@ -80,13 +118,12 @@ static void skipEndOfLine(ReadBuffer & in) if (!in.eof() && *in.position() == '\n') ++in.position(); else - throw Exception( + throw Exception(ErrorCodes::INCORRECT_DATA, "Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)." - " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.", - ErrorCodes::INCORRECT_DATA); + " Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r."); } else if (!in.eof()) - throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Expected end of line"); } /// Skip `whitespace` symbols allowed in CSV. @@ -96,51 +133,51 @@ static inline void skipWhitespacesAndTabs(ReadBuffer & in) ++in.position(); } -CSVFormatReader::CSVFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) +CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(buf_, format_settings_), buf(&buf_) { } void CSVFormatReader::skipFieldDelimiter() { - skipWhitespacesAndTabs(*in); - assertChar(format_settings.csv.delimiter, *in); + skipWhitespacesAndTabs(*buf); + assertChar(format_settings.csv.delimiter, *buf); } template String CSVFormatReader::readCSVFieldIntoString() { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); String field; if constexpr (read_string) - readCSVString(field, *in, format_settings.csv); + readCSVString(field, *buf, format_settings.csv); else - readCSVField(field, *in, format_settings.csv); + readCSVField(field, *buf, format_settings.csv); return field; } void CSVFormatReader::skipField() { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); NullOutput out; - readCSVStringInto(out, *in, format_settings.csv); + readCSVStringInto(out, *buf, format_settings.csv); } void CSVFormatReader::skipRowEndDelimiter() { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); - if (in->eof()) + if (buf->eof()) return; /// we support the extra delimiter at the end of the line - if (*in->position() == format_settings.csv.delimiter) - ++in->position(); + if (*buf->position() == format_settings.csv.delimiter) + ++buf->position(); - skipWhitespacesAndTabs(*in); - if (in->eof()) + skipWhitespacesAndTabs(*buf); + if (buf->eof()) return; - skipEndOfLine(*in); + skipEndOfLine(*buf); } void CSVFormatReader::skipHeaderRow() @@ -148,8 +185,8 @@ void CSVFormatReader::skipHeaderRow() do { skipField(); - skipWhitespacesAndTabs(*in); - } while (checkChar(format_settings.csv.delimiter, *in)); + skipWhitespacesAndTabs(*buf); + } while (checkChar(format_settings.csv.delimiter, *buf)); skipRowEndDelimiter(); } @@ -157,12 +194,13 @@ void CSVFormatReader::skipHeaderRow() template std::vector CSVFormatReader::readRowImpl() { + std::vector fields; do { fields.push_back(readCSVFieldIntoString()); - skipWhitespacesAndTabs(*in); - } while (checkChar(format_settings.csv.delimiter, *in)); + skipWhitespacesAndTabs(*buf); + } while (checkChar(format_settings.csv.delimiter, *buf)); skipRowEndDelimiter(); return fields; @@ -174,12 +212,12 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) try { - skipWhitespacesAndTabs(*in); - assertChar(delimiter, *in); + skipWhitespacesAndTabs(*buf); + assertChar(delimiter, *buf); } catch (const DB::Exception &) { - if (*in->position() == '\n' || *in->position() == '\r') + if (*buf->position() == '\n' || *buf->position() == '\r') { out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected." @@ -189,7 +227,7 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) else { out << "ERROR: There is no delimiter (" << delimiter << "). "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n"; } return false; @@ -200,24 +238,24 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); - if (in->eof()) + if (buf->eof()) return true; /// we support the extra delimiter at the end of the line - if (*in->position() == format_settings.csv.delimiter) + if (*buf->position() == format_settings.csv.delimiter) { - ++in->position(); - skipWhitespacesAndTabs(*in); - if (in->eof()) + ++buf->position(); + skipWhitespacesAndTabs(*buf); + if (buf->eof()) return true; } - if (!in->eof() && *in->position() != '\n' && *in->position() != '\r') + if (!buf->eof() && *buf->position() != '\n' && *buf->position() != '\r') { out << "ERROR: There is no line feed. "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n" " It's like your file has more columns than expected.\n" "And if your file has the right number of columns, maybe it has an unquoted string value with a comma.\n"; @@ -225,7 +263,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) return false; } - skipEndOfLine(*in); + skipEndOfLine(*buf); return true; } @@ -236,10 +274,10 @@ bool CSVFormatReader::readField( bool is_last_file_column, const String & /*column_name*/) { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); - const bool at_delimiter = !in->eof() && *in->position() == format_settings.csv.delimiter; - const bool at_last_column_line_end = is_last_file_column && (in->eof() || *in->position() == '\n' || *in->position() == '\r'); + const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter; + const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'); /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default /// only one empty or NULL column will be expected @@ -254,17 +292,16 @@ bool CSVFormatReader::readField( column.insertDefault(); return false; } - else if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) + + if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *in, format_settings, serialization); - } - else - { - /// Read the column normally. - serialization->deserializeTextCSV(column, *in, format_settings); - return true; + return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); } + + /// Read the column normally. + serialization->deserializeTextCSV(column, *buf, format_settings); + return true; } void CSVFormatReader::skipPrefixBeforeHeader() @@ -273,27 +310,39 @@ void CSVFormatReader::skipPrefixBeforeHeader() readRow(); } +void CSVFormatReader::setReadBuffer(ReadBuffer & in_) +{ + buf = assert_cast(&in_); + FormatWithNamesAndTypesReader::setReadBuffer(*buf); +} CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( - in_, + buf, format_settings_, with_names_, with_types_, &reader, - getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV)) - , reader(in_, format_settings_) + getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), + format_settings_.csv.try_detect_header) + , buf(in_) + , reader(buf, format_settings_) { } - -DataTypes CSVSchemaReader::readRowAndGetDataTypes() +std::pair, DataTypes> CSVSchemaReader::readRowAndGetFieldsAndDataTypes() { - if (in.eof()) + if (buf.eof()) return {}; auto fields = reader.readRow(); - return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV); + auto data_types = tryInferDataTypesByEscapingRule(fields, format_settings, FormatSettings::EscapingRule::CSV); + return {fields, data_types}; +} + +DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl() +{ + return std::move(readRowAndGetFieldsAndDataTypes().second); } @@ -330,7 +379,7 @@ std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memor { pos = find_first_symbols<'"'>(pos, in.buffer().end()); if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); else if (pos == in.buffer().end()) continue; else if (*pos == '"') @@ -346,7 +395,7 @@ std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memor { pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end()); if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); else if (pos == in.buffer().end()) continue; @@ -383,10 +432,10 @@ std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memor void registerFileSegmentationEngineCSV(FormatFactory & factory) { - auto register_func = [&](const String & format_name, bool with_names, bool with_types) + auto register_func = [&](const String & format_name, bool, bool) { - size_t min_rows = 1 + int(with_names) + int(with_types); - factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) + static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment). + factory.registerFileSegmentationEngine(format_name, [](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) { return fileSegmentationEngineCSVImpl(in, memory, min_bytes, min_rows, max_rows); }); @@ -410,7 +459,7 @@ void registerCSVSchemaReader(FormatFactory & factory) { String result = getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::CSV); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}", settings.column_names_for_schema_inference); + result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header); return result; }); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 1d79265c22b..86f7fe3466c 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -26,19 +26,27 @@ public: String getName() const override { return "CSVRowInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + protected: - explicit CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr format_reader_); + CSVRowInputFormat(const Block & header_, std::shared_ptr in_, const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr format_reader_); + + CSVRowInputFormat(const Block & header_, std::shared_ptr in_buf_, const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + +protected: + std::shared_ptr buf; }; class CSVFormatReader : public FormatWithNamesAndTypesReader { public: - CSVFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_); + CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_); bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; @@ -64,12 +72,21 @@ public: std::vector readTypes() override { return readHeaderRow(); } std::vector readHeaderRow() { return readRowImpl(); } std::vector readRow() { return readRowImpl(); } + std::vector readRowForHeaderDetection() override { return readHeaderRow(); } + template std::vector readRowImpl(); template String readCSVFieldIntoString(); + + void setReadBuffer(ReadBuffer & in_) override; + + FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; } + +protected: + PeekableReadBuffer * buf; }; class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader @@ -78,9 +95,12 @@ public: CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; + std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + PeekableReadBuffer buf; CSVFormatReader reader; + DataTypes buffered_types; }; std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows); diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 9e7a5a9aa8f..5d438d47de6 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -130,8 +130,8 @@ static void fillLiteralInfo(DataTypes & nested_types, LiteralInfo & info) field_type = Field::Types::Map; } else - throw Exception("Unexpected literal type inside Array: " + nested_type->getName() + ". It's a bug", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected literal type inside Array: {}. It's a bug", + nested_type->getName()); if (is_nullable) nested_type = std::make_shared(nested_type); @@ -159,9 +159,9 @@ public: else if (ast->as()) return; else if (ast->as()) - throw DB::Exception("Identifier in constant expression", ErrorCodes::SYNTAX_ERROR); + throw DB::Exception(ErrorCodes::SYNTAX_ERROR, "Identifier in constant expression"); else - throw DB::Exception("Syntax error in constant expression", ErrorCodes::SYNTAX_ERROR); + throw DB::Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error in constant expression"); } private: @@ -315,7 +315,7 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & { const LiteralInfo & info = replaced_literals[i]; if (info.literal->begin.value() < prev_end) - throw Exception("Cannot replace literals", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot replace literals"); while (prev_end < info.literal->begin.value()) { @@ -616,13 +616,12 @@ ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, si structure->actions_on_literals->execute(evaluated); if (!evaluated || evaluated.rows() != rows_count) - throw Exception("Number of rows mismatch after evaluation of batch of constant expressions: got " + - std::to_string(evaluated.rows()) + " rows for " + std::to_string(rows_count) + " expressions", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of rows mismatch after evaluation of batch of constant expressions: " + "got {} rows for {} expressions", evaluated.rows(), rows_count); if (!evaluated.has(structure->result_column_name)) - throw Exception("Cannot evaluate template " + structure->result_column_name + ", block structure:\n" + evaluated.dumpStructure(), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot evaluate template {}, block structure:\n{}", + structure->result_column_name, evaluated.dumpStructure()); rows_count = 0; auto res = evaluated.getByName(structure->result_column_name); @@ -633,7 +632,7 @@ ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, si /// Extract column with evaluated expression and mask for NULLs const auto & tuple = assert_cast(*res.column); if (tuple.tupleSize() != 2) - throw Exception("Invalid tuple size, it'a a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid tuple size, it'a a bug"); const auto & is_null = assert_cast(tuple.getColumn(1)); for (size_t i = 0; i < is_null.size(); ++i) diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index a2f2e59ef16..6f73ede5d4d 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -42,7 +42,8 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( with_names_, with_types_, format_settings_, - std::make_unique(*buf_, ignore_spaces_, format_settings_)) + std::make_unique(*buf_, ignore_spaces_, format_settings_), + format_settings_.custom.try_detect_header) , buf(std::move(buf_)) { /// In case of CustomSeparatedWithNames(AndTypes) formats and enabled setting input_format_with_names_use_header we don't know @@ -53,8 +54,10 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( && format_settings_.custom.row_between_delimiter.empty()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Input format CustomSeparatedWithNames(AndTypes) cannot work properly with enabled setting input_format_with_names_use_header, " - "when format_custom_field_delimiter and format_custom_row_after_delimiter are the same and format_custom_row_between_delimiter is empty."); + "Input format CustomSeparatedWithNames(AndTypes) cannot work properly with enabled setting " + "input_format_with_names_use_header, when format_custom_field_delimiter and " + "format_custom_row_after_delimiter are the same " + "and format_custom_row_between_delimiter is empty."); } } @@ -160,14 +163,14 @@ bool CustomSeparatedFormatReader::checkEndOfRow() return checkForSuffixImpl(true); } -template +template String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_last, bool is_unknown) { if (!is_first) skipFieldDelimiter(); skipSpaces(); updateFormatSettings(is_last); - if constexpr (is_header) + if constexpr (mode != ReadFieldMode::AS_FIELD) { /// If the number of columns is unknown and we use CSV escaping rule, /// we don't know what delimiter to expect after the value, @@ -176,7 +179,10 @@ String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_l return readCSVStringWithTwoPossibleDelimiters( *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); - return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + if constexpr (mode == ReadFieldMode::AS_STRING) + return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + else + return readStringOrFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); } else { @@ -188,7 +194,7 @@ String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_l } } -template +template std::vector CustomSeparatedFormatReader::readRowImpl() { std::vector values; @@ -198,14 +204,14 @@ std::vector CustomSeparatedFormatReader::readRowImpl() { do { - values.push_back(readFieldIntoString(values.empty(), false, true)); + values.push_back(readFieldIntoString(values.empty(), false, true)); } while (!checkEndOfRow()); columns = values.size(); } else { for (size_t i = 0; i != columns; ++i) - values.push_back(readFieldIntoString(i == 0, i + 1 == columns, false)); + values.push_back(readFieldIntoString(i == 0, i + 1 == columns, false)); } skipRowEndDelimiter(); @@ -341,7 +347,7 @@ bool CustomSeparatedFormatReader::parseRowBetweenDelimiterWithDiagnosticInfo(Wri void CustomSeparatedFormatReader::setReadBuffer(ReadBuffer & in_) { buf = assert_cast(&in_); - FormatWithNamesAndTypesReader::setReadBuffer(in_); + FormatWithNamesAndTypesReader::setReadBuffer(*buf); } CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( @@ -352,16 +358,20 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( with_names_, with_types_, &reader, - getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule)) + getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule), + format_setting_.custom.try_detect_header) , buf(in_) , reader(buf, ignore_spaces_, format_setting_) { } -DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes() +std::pair, DataTypes> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { - if (reader.checkForSuffix()) + if (no_more_data || reader.checkForSuffix()) + { + no_more_data = true; return {}; + } if (!first_row || with_names || with_types) reader.skipRowBetweenDelimiter(); @@ -370,7 +380,13 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes() first_row = false; auto fields = reader.readRow(); - return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); + auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); + return {fields, data_types}; +} + +DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl() +{ + return readRowAndGetFieldsAndDataTypes().second; } void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) @@ -414,7 +430,7 @@ void registerCustomSeparatedSchemaReader(FormatFactory & factory) { String result = getAdditionalFormatInfoByEscapingRule(settings, settings.custom.escaping_rule); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}", settings.column_names_for_schema_inference); + result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}", settings.column_names_for_schema_inference, settings.custom.try_detect_header); return result + fmt::format( ", result_before_delimiter={}, row_before_delimiter={}, field_delimiter={}," " row_after_delimiter={}, row_between_delimiter={}, result_after_delimiter={}", diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index 8a3112eb9c1..26ee32be370 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -68,22 +68,32 @@ public: std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } - std::vector readHeaderRow() {return readRowImpl(); } + std::vector readHeaderRow() {return readRowImpl(); } - std::vector readRow() { return readRowImpl(); } + std::vector readRow() { return readRowImpl(); } + + std::vector readRowForHeaderDetection() override { return readRowImpl(); } bool checkEndOfRow(); bool checkForSuffixImpl(bool check_eof); inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } - EscapingRule getEscapingRule() { return format_settings.custom.escaping_rule; } + EscapingRule getEscapingRule() const override { return format_settings.custom.escaping_rule; } void setReadBuffer(ReadBuffer & in_) override; + private: - template + enum class ReadFieldMode : uint8_t + { + AS_STRING, + AS_FIELD, + AS_POSSIBLE_STRING, + }; + + template std::vector readRowImpl(); - template + template String readFieldIntoString(bool is_first, bool is_last, bool is_unknown); void updateFormatSettings(bool is_last_column); @@ -99,7 +109,9 @@ public: CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; + + std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; @@ -107,6 +119,7 @@ private: CustomSeparatedFormatReader reader; bool first_row = true; JSONInferenceInfo json_inference_info; + bool no_more_data = false; }; } diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index ec5612ae30b..1b73e0131f6 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -30,19 +30,14 @@ HiveTextRowInputFormat::HiveTextRowInputFormat( } HiveTextRowInputFormat::HiveTextRowInputFormat( - const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_) + const Block & header_, std::shared_ptr buf_, const Params & params_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, *buf_, params_, true, false, format_settings_, std::make_unique(*buf_, format_settings_)), buf(std::move(buf_)) + header_, buf_, params_, true, false, format_settings_, std::make_unique(*buf_, format_settings_)) { } -void HiveTextRowInputFormat::setReadBuffer(ReadBuffer & in_) -{ - buf->setSubBuffer(in_); -} - HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_) - : CSVFormatReader(buf_, format_settings_), buf(&buf_), input_field_names(format_settings_.hive_text.input_field_names) + : CSVFormatReader(buf_, format_settings_), input_field_names(format_settings_.hive_text.input_field_names) { } @@ -59,12 +54,6 @@ std::vector HiveTextFormatReader::readTypes() throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readTypes is not implemented"); } -void HiveTextFormatReader::setReadBuffer(ReadBuffer & buf_) -{ - buf = assert_cast(&buf_); - CSVFormatReader::setReadBuffer(buf_); -} - void registerInputFormatHiveText(FormatFactory & factory) { factory.registerInputFormat( diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h index 251486b247c..313aad0d40d 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -18,13 +18,9 @@ public: String getName() const override { return "HiveTextRowInputFormat"; } - void setReadBuffer(ReadBuffer & in_) override; - private: HiveTextRowInputFormat( - const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_); - - std::unique_ptr buf; + const Block & header_, std::shared_ptr buf_, const Params & params_, const FormatSettings & format_settings_); }; class HiveTextFormatReader final : public CSVFormatReader @@ -35,10 +31,7 @@ public: std::vector readNames() override; std::vector readTypes() override; - void setReadBuffer(ReadBuffer & buf_) override; - private: - PeekableReadBuffer * buf; std::vector input_field_names; }; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 8bf0ecc5d7e..17bade02a58 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -120,7 +120,7 @@ void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) bool quotes = false; if (*buf->position() != '{') - throw Exception("JSON object must begin with '{'.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "JSON object must begin with '{'."); ++buf->position(); ++balance; @@ -130,7 +130,7 @@ void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) while (balance) { if (buf->eof()) - throw Exception("Unexpected end of file while parsing JSON object.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected end of file while parsing JSON object."); if (quotes) { diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index d23c16c1437..1e8f57aa9a6 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -44,7 +44,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) { auto num_rows = chunk.getNumRows(); if (num_rows != 2) - throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in extremes chunk, expected 2", num_rows); const auto & columns = chunk.getColumns(); JSONUtils::writeFieldDelimiter(*ostr, 2); @@ -66,7 +66,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) { auto num_rows = chunk.getNumRows(); if (num_rows != 1) - throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in totals chunk, expected 1", num_rows); const auto & columns = chunk.getColumns(); JSONUtils::writeFieldDelimiter(*ostr, 2); diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index e9c28099c5f..b91345bebe3 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -186,7 +187,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( { } -DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes() +DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl() { if (first_row) first_row = false; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index 2151967517a..bb699f0ca2e 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -79,7 +79,7 @@ public: JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; void transformFinalTypeIfNeeded(DataTypePtr & type) override; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index a8881c5f398..22ac31c7824 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -122,7 +122,7 @@ StringRef JSONEachRowRowInputFormat::readColumnName(ReadBuffer & buf) void JSONEachRowRowInputFormat::skipUnknownField(StringRef name_ref) { if (!format_settings.skip_unknown_fields) - throw Exception("Unknown field found while parsing JSONEachRow format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field found while parsing JSONEachRow format: {}", name_ref.toString()); skipJSONField(*in, name_ref); } @@ -130,7 +130,7 @@ void JSONEachRowRowInputFormat::skipUnknownField(StringRef name_ref) void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns) { if (seen_columns[index]) - throw Exception("Duplicate field found while parsing JSONEachRow format: " + columnName(index), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate field found while parsing JSONEachRow format: {}", columnName(index)); seen_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; @@ -143,7 +143,7 @@ inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index) skipWhitespaceIfAny(*in); if (in->eof()) - throw ParsingException("Unexpected end of stream while parsing JSONEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA); + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected end of stream while parsing JSONEachRow format"); else if (*in->position() == '}') { ++in->position(); @@ -180,7 +180,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) else if (column_index == NESTED_FIELD) readNestedData(name_ref.toString(), columns); else - throw Exception("Logical error: illegal value of column_index", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: illegal value of column_index"); } else { diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index 677f8bb28ec..a86d68c17ff 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -19,7 +19,7 @@ LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, Re if (header_.columns() != 1 || !typeid_cast(header_.getByPosition(0).column.get())) { - throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "This input format is only suitable for tables with a single column of type String."); } } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 0e8421566ab..f337eedbb05 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -384,7 +384,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT { - throw Exception("Error occurred while parsing msgpack data.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Error occurred while parsing msgpack data."); } bool MsgPackRowInputFormat::readObject() @@ -398,7 +398,7 @@ bool MsgPackRowInputFormat::readObject() { buf->position() = buf->buffer().end(); if (buf->eof()) - throw Exception("Unexpected end of file while parsing msgpack object.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected end of file while parsing msgpack object."); buf->position() = buf->buffer().end(); buf->makeContinuousMemoryFromCheckpointToPos(); buf->rollbackToCheckpoint(); @@ -421,7 +421,7 @@ bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & if (!has_more_data) { if (column_index != 0) - throw Exception("Not enough values to complete the row.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Not enough values to complete the row."); return false; } return true; @@ -436,7 +436,9 @@ MsgPackSchemaReader::MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings : IRowSchemaReader(buf, format_settings_), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns) { if (!number_of_columns) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "You must specify setting input_format_msgpack_number_of_columns to extract table schema from MsgPack data"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "You must specify setting input_format_msgpack_number_of_columns " + "to extract table schema from MsgPack data"); } @@ -461,7 +463,7 @@ msgpack::object_handle MsgPackSchemaReader::readObject() { buf.position() = buf.buffer().end(); if (buf.eof()) - throw Exception("Unexpected end of file while parsing msgpack object", ErrorCodes::UNEXPECTED_END_OF_FILE); + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of file while parsing msgpack object"); buf.position() = buf.buffer().end(); buf.makeContinuousMemoryFromCheckpointToPos(); buf.rollbackToCheckpoint(); diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index c9b41ee10bb..a1ed45ec40f 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -208,7 +208,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr default: break; } - throw Exception("Type " + data_type->getName() + " is not supported for MsgPack output format", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for MsgPack output format", data_type->getName()); } void MsgPackRowOutputFormat::write(const Columns & columns, size_t row_num) diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index c3f7b4e0ad7..bf55fe88469 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -337,7 +337,8 @@ static void readFirstCreateAndInsertQueries(ReadBuffer & in, String & table_name } if (!insert_query_present) - throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There is no INSERT queries{} in MySQL dump file", table_name.empty() ? "" : " for table " + table_name); + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There is no INSERT queries{} in MySQL dump file", + table_name.empty() ? "" : " for table " + table_name); skipToDataInInsertQuery(in, column_names.empty() ? &column_names : nullptr); } diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 5855b6e1ce0..2e45d817506 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -104,12 +104,12 @@ static void getFileReaderAndSchema( auto result = arrow::adapters::orc::ORCFileReader::Open(arrow_file, arrow::default_memory_pool()); if (!result.ok()) - throw Exception(result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); + throw Exception::createDeprecated(result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); file_reader = std::move(result).ValueOrDie(); auto read_schema_result = file_reader->ReadSchema(); if (!read_schema_result.ok()) - throw Exception(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); + throw Exception::createDeprecated(read_schema_result.status().ToString(), ErrorCodes::BAD_ARGUMENTS); schema = std::move(read_schema_result).ValueOrDie(); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 2605719d1ec..42c3e178436 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -153,7 +153,7 @@ std::unique_ptr ORCBlockOutputFormat::getORCType(const DataTypePtr & } default: { - throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for ORC output format", type->getName()); } } } @@ -462,7 +462,7 @@ void ORCBlockOutputFormat::writeColumn( break; } default: - throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for ORC output format", type->getName()); } } diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 19ec3772da0..293bf4f73f3 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -161,6 +161,12 @@ Chunk ParallelParsingInputFormat::generate() /// Delayed launching of segmentator thread if (unlikely(!parsing_started.exchange(true))) { + /// Lock 'finish_and_wait_mutex' to avoid recreation of + /// 'segmentator_thread' after it was joined. + std::lock_guard finish_and_wait_lock(finish_and_wait_mutex); + if (finish_and_wait_called) + return {}; + segmentator_thread = ThreadFromGlobalPool( &ParallelParsingInputFormat::segmentatorThreadFunction, this, CurrentThread::getGroup()); } diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index a16471ecc42..252b6ec3f81 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -111,7 +111,7 @@ public: void resetParser() override final { - throw Exception("resetParser() is not allowed for " + getName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "resetParser() is not allowed for {}", getName()); } const BlockMissingValues & getMissingValues() const override final @@ -172,8 +172,8 @@ private: case IProcessor::Status::NeedData: break; case IProcessor::Status::Async: break; case IProcessor::Status::ExpandPipeline: - throw Exception("One of the parsers returned status " + IProcessor::statusToName(status) + - " during parallel parsing", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "One of the parsers returned status {} during parallel parsing", + IProcessor::statusToName(status)); } } } diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index abe262d17bd..a3b34d30ed6 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -30,7 +30,7 @@ namespace ErrorCodes do \ { \ if (::arrow::Status _s = (status); !_s.ok()) \ - throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ + throw Exception::createDeprecated(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ } while (false) ParquetBlockInputFormat::ParquetBlockInputFormat(ReadBuffer & in_, Block header_, const FormatSettings & format_settings_) @@ -49,8 +49,8 @@ Chunk ParquetBlockInputFormat::generate() if (is_stopped) return {}; - for (; row_group_current < row_group_total && skip_row_groups.contains(row_group_current); ++row_group_current) - ; + while (row_group_current < row_group_total && skip_row_groups.contains(row_group_current)) + ++row_group_current; if (row_group_current >= row_group_total) return res; @@ -62,12 +62,13 @@ Chunk ParquetBlockInputFormat::generate() arrow::Status get_batch_reader_status = file_reader->GetRecordBatchReader(row_group_indices, column_indices, &rbr); if (!get_batch_reader_status.ok()) - throw ParsingException{"Error while reading Parquet data: " + get_batch_reader_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", + get_batch_reader_status.ToString()); arrow::Status read_status = rbr->ReadAll(&table); if (!read_status.ok()) - throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(), ErrorCodes::CANNOT_READ_ALL_DATA}; + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Error while reading Parquet data: {}", read_status.ToString()); ++row_group_current; @@ -78,6 +79,7 @@ Chunk ParquetBlockInputFormat::generate() if (format_settings.defaults_for_omitted_fields) for (const auto & column_idx : missing_columns) block_missing_values.setBits(column_idx, res.getNumRows()); + return res; } diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index e99b308b87b..d889c156e18 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -50,14 +50,14 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) props, /*parquet::default_writer_properties(),*/ &file_writer); if (!status.ok()) - throw Exception{"Error while opening a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", status.ToString()); } // TODO: calculate row_group_size depending on a number of rows and table size auto status = file_writer->WriteTable(*arrow_table, format_settings.parquet.row_group_size); if (!status.ok()) - throw Exception{"Error while writing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while writing a table: {}", status.ToString()); } void ParquetBlockOutputFormat::finalizeImpl() @@ -71,7 +71,7 @@ void ParquetBlockOutputFormat::finalizeImpl() auto status = file_writer->Close(); if (!status.ok()) - throw Exception{"Error while closing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); } void ParquetBlockOutputFormat::resetFormatterImpl() diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 3faeefbaabd..129c9ca3156 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -40,9 +40,9 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat( void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num) { if (!allow_multiple_rows && !first_row) - throw Exception( - "The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", - ErrorCodes::NO_ROW_DELIMITER); + throw Exception(ErrorCodes::NO_ROW_DELIMITER, + "The ProtobufSingle format can't be used " + "to write multiple rows because this format doesn't have any row delimiter."); if (row_num == 0) serializer->setColumns(columns.data(), columns.size()); diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index ef577395d8f..74ce7d7f2ac 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -59,11 +59,12 @@ bool RegexpFieldExtractor::parseRow(PeekableReadBuffer & buf) static_cast(re2_arguments_ptrs.size())); if (!match && !skip_unmatched) - throw Exception("Line \"" + std::string(buf.position(), line_to_match) + "\" doesn't match the regexp.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Line \"{}\" doesn't match the regexp.", + std::string(buf.position(), line_to_match)); buf.position() += line_size; if (!buf.eof() && !checkChar('\n', buf)) - throw Exception("No \\n at the end of line.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No \\n at the end of line."); return match; } @@ -109,7 +110,7 @@ bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns) void RegexpRowInputFormat::readFieldsFromMatch(MutableColumns & columns, RowReadExtension & ext) { if (field_extractor.getMatchedFieldsSize() != columns.size()) - throw Exception("The number of matched fields in line doesn't match the number of columns.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "The number of matched fields in line doesn't match the number of columns."); ext.read_columns.assign(columns.size(), false); for (size_t columns_index = 0; columns_index < columns.size(); ++columns_index) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index f5f05453f25..bf6d0ab88d2 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -84,7 +84,7 @@ static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) tmp.append(buf.position(), next_pos - buf.position()); buf.position() += next_pos + 1 - buf.position(); if (buf.eof()) - throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); + throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Cannot parse escape sequence"); tmp.push_back(parseEscapeSequence(*buf.position())); ++buf.position(); @@ -92,7 +92,7 @@ static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) } } - throw ParsingException("Unexpected end of stream while reading key name from TSKV format", ErrorCodes::CANNOT_READ_ALL_DATA); + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected end of stream while reading key name from TSKV format"); } @@ -130,7 +130,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex if (!it) { if (!format_settings.skip_unknown_fields) - throw Exception("Unknown field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field found while parsing TSKV format: {}", name_ref.toString()); /// If the key is not found, skip the value. NullOutput sink; @@ -141,7 +141,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex index = it->getMapped(); if (seen_columns[index]) - throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate field found while parsing TSKV format: {}", name_ref.toString()); seen_columns[index] = read_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; @@ -156,12 +156,12 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex { /// The only thing that can go without value is `tskv` fragment that is ignored. if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4))) - throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Found field without value while parsing TSKV format: {}", name_ref.toString()); } if (in->eof()) { - throw ParsingException("Unexpected end of stream after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_READ_ALL_DATA); + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected end of stream after field in TSKV format: {}", name_ref.toString()); } else if (*in->position() == '\t') { @@ -182,7 +182,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex seen_columns[index] = read_columns[index] = false; } - throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Found garbage after field in TSKV format: {}", name_ref.toString()); } } } @@ -255,7 +255,7 @@ NamesAndTypesList TSKVSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof) { /// The only thing that can go without value is `tskv` fragment that is ignored. if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4))) - throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Found field without value while parsing TSKV format: {}", name_ref.toString()); } } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 174a41a8a59..868639e66c2 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -25,11 +25,10 @@ namespace ErrorCodes static void checkForCarriageReturn(ReadBuffer & in) { if (!in.eof() && (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))) - throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." + throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." " You must transform your file to Unix format." - "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.", - ErrorCodes::INCORRECT_DATA); + "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r."); } TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( @@ -40,49 +39,74 @@ TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( bool with_types_, bool is_raw_, const FormatSettings & format_settings_) + : TabSeparatedRowInputFormat(header_, std::make_unique(in_), params_, with_names_, with_types_, is_raw_, format_settings_) +{ +} + +TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( + const Block & header_, + std::unique_ptr in_, + const Params & params_, + bool with_names_, + bool with_types_, + bool is_raw, + const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( header_, - in_, + *in_, params_, false, with_names_, with_types_, format_settings_, - std::make_unique(in_, format_settings_, is_raw_)) + std::make_unique(*in_, format_settings_, is_raw), + format_settings_.tsv.try_detect_header) + , buf(std::move(in_)) { } -TabSeparatedFormatReader::TabSeparatedFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_, bool is_raw_) - : FormatWithNamesAndTypesReader(in_, format_settings_), is_raw(is_raw_) +void TabSeparatedRowInputFormat::setReadBuffer(ReadBuffer & in_) +{ + buf->setSubBuffer(in_); +} + +TabSeparatedFormatReader::TabSeparatedFormatReader(PeekableReadBuffer & in_, const FormatSettings & format_settings_, bool is_raw_) + : FormatWithNamesAndTypesReader(in_, format_settings_), buf(&in_), is_raw(is_raw_) { } void TabSeparatedFormatReader::skipFieldDelimiter() { - assertChar('\t', *in); + assertChar('\t', *buf); } void TabSeparatedFormatReader::skipRowEndDelimiter() { - if (in->eof()) + if (buf->eof()) return; if (unlikely(first_row)) { - checkForCarriageReturn(*in); + checkForCarriageReturn(*buf); first_row = false; } - assertChar('\n', *in); + assertChar('\n', *buf); } +template String TabSeparatedFormatReader::readFieldIntoString() { String field; if (is_raw) - readString(field, *in); + readString(field, *buf); else - readEscapedString(field, *in); + { + if constexpr (read_string) + readEscapedString(field, *buf); + else + readTSVField(field, *buf); + } return field; } @@ -90,9 +114,9 @@ void TabSeparatedFormatReader::skipField() { NullOutput out; if (is_raw) - readStringInto(out, *in); + readStringInto(out, *buf); else - readEscapedStringInto(out, *in); + readEscapedStringInto(out, *buf); } void TabSeparatedFormatReader::skipHeaderRow() @@ -101,19 +125,20 @@ void TabSeparatedFormatReader::skipHeaderRow() { skipField(); } - while (checkChar('\t', *in)); + while (checkChar('\t', *buf)); skipRowEndDelimiter(); } -std::vector TabSeparatedFormatReader::readRow() +template +std::vector TabSeparatedFormatReader::readRowImpl() { std::vector fields; do { - fields.push_back(readFieldIntoString()); + fields.push_back(readFieldIntoString()); } - while (checkChar('\t', *in)); + while (checkChar('\t', *buf)); skipRowEndDelimiter(); return fields; @@ -122,8 +147,8 @@ std::vector TabSeparatedFormatReader::readRow() bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/) { - const bool at_delimiter = !is_last_file_column && !in->eof() && *in->position() == '\t'; - const bool at_last_column_line_end = is_last_file_column && (in->eof() || *in->position() == '\n'); + const bool at_delimiter = !is_last_file_column && !buf->eof() && *buf->position() == '\t'; + const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n'); if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) { @@ -136,17 +161,17 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t if (is_raw) { if (as_nullable) - return SerializationNullable::deserializeTextRawImpl(column, *in, format_settings, serialization); + return SerializationNullable::deserializeTextRawImpl(column, *buf, format_settings, serialization); - serialization->deserializeTextRaw(column, *in, format_settings); + serialization->deserializeTextRaw(column, *buf, format_settings); return true; } if (as_nullable) - return SerializationNullable::deserializeTextEscapedImpl(column, *in, format_settings, serialization); + return SerializationNullable::deserializeTextEscapedImpl(column, *buf, format_settings, serialization); - serialization->deserializeTextEscaped(column, *in, format_settings); + serialization->deserializeTextEscaped(column, *buf, format_settings); return true; } @@ -154,25 +179,25 @@ bool TabSeparatedFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer { try { - assertChar('\t', *in); + assertChar('\t', *buf); } catch (const DB::Exception &) { - if (*in->position() == '\n') + if (*buf->position() == '\n') { out << "ERROR: Line feed found where tab is expected." " It's like your file has less columns than expected.\n" "And if your file has the right number of columns, " "maybe it has an unescaped backslash in value before tab, which causes the tab to be escaped.\n"; } - else if (*in->position() == '\r') + else if (*buf->position() == '\r') { out << "ERROR: Carriage return found where tab is expected.\n"; } else { out << "ERROR: There is no tab. "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n"; } return false; @@ -183,22 +208,22 @@ bool TabSeparatedFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) { - if (in->eof()) + if (buf->eof()) return true; try { - assertChar('\n', *in); + assertChar('\n', *buf); } catch (const DB::Exception &) { - if (*in->position() == '\t') + if (*buf->position() == '\t') { out << "ERROR: Tab found where line feed is expected." " It's like your file has more columns than expected.\n" "And if your file has the right number of columns, maybe it has an unescaped tab in a value.\n"; } - else if (*in->position() == '\r') + else if (*buf->position() == '\r') { out << "ERROR: Carriage return found where line feed is expected." " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; @@ -206,7 +231,7 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) else { out << "ERROR: There is no line feed. "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n"; } return false; @@ -220,19 +245,19 @@ void TabSeparatedFormatReader::checkNullValueForNonNullable(DataTypePtr type) bool can_be_parsed_as_null = type->isNullable() || type->isLowCardinalityNullable() || format_settings.null_as_default; // check null value for type is not nullable. don't cross buffer bound for simplicity, so maybe missing some case - if (!can_be_parsed_as_null && !in->eof()) + if (!can_be_parsed_as_null && !buf->eof()) { - if (*in->position() == '\\' && in->available() >= 2) + if (*buf->position() == '\\' && buf->available() >= 2) { - ++in->position(); - if (*in->position() == 'N') + ++buf->position(); + if (*buf->position() == 'N') { - ++in->position(); + ++buf->position(); throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected NULL value of not Nullable type {}", type->getName()); } else { - --in->position(); + --buf->position(); } } } @@ -246,29 +271,43 @@ void TabSeparatedFormatReader::skipPrefixBeforeHeader() void TabSeparatedRowInputFormat::syncAfterError() { - skipToUnescapedNextLineOrEOF(*in); + skipToUnescapedNextLineOrEOF(*buf); +} + +void TabSeparatedFormatReader::setReadBuffer(ReadBuffer & in_) +{ + buf = assert_cast(&in_); + FormatWithNamesAndTypesReader::setReadBuffer(*buf); } TabSeparatedSchemaReader::TabSeparatedSchemaReader( ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( - in_, + buf, format_settings_, with_names_, with_types_, &reader, - getDefaultDataTypeForEscapingRule(is_raw_ ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped)) - , reader(in_, format_settings_, is_raw_) + getDefaultDataTypeForEscapingRule(is_raw_ ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped), + format_settings_.tsv.try_detect_header) + , buf(in_) + , reader(buf, format_settings_, is_raw_) { } -DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypes() +std::pair, DataTypes> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { - if (in.eof()) + if (buf.eof()) return {}; auto fields = reader.readRow(); - return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + return {fields, data_types}; +} + +DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypesImpl() +{ + return readRowAndGetFieldsAndDataTypes().second; } void registerInputFormatTabSeparated(FormatFactory & factory) @@ -309,7 +348,10 @@ void registerTSVSchemaReader(FormatFactory & factory) String result = getAdditionalFormatInfoByEscapingRule( settings, is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}", settings.column_names_for_schema_inference); + result += fmt::format( + ", column_names_for_schema_inference={}, try_detect_header={}", + settings.column_names_for_schema_inference, + settings.tsv.try_detect_header); return result; }); } @@ -337,7 +379,7 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); else if (pos == in.buffer().end()) continue; @@ -377,10 +419,10 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory) { for (bool is_raw : {false, true}) { - auto register_func = [&](const String & format_name, bool with_names, bool with_types) + auto register_func = [&](const String & format_name, bool, bool) { - size_t min_rows = 1 + static_cast(with_names) + static_cast(with_types); - factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) + static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment). + factory.registerFileSegmentationEngine(format_name, [is_raw](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) { return fileSegmentationEngineTabSeparatedImpl(in, memory, is_raw, min_bytes, min_rows, max_rows); }); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 3476b974c3b..9edcf86b5de 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -22,16 +22,24 @@ public: String getName() const override { return "TabSeparatedRowInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + private: + TabSeparatedRowInputFormat(const Block & header_, std::unique_ptr in_, const Params & params_, + bool with_names_, bool with_types_, bool is_raw, const FormatSettings & format_settings_); + bool allowSyncAfterError() const override { return true; } void syncAfterError() override; bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } + + + std::unique_ptr buf; }; class TabSeparatedFormatReader final : public FormatWithNamesAndTypesReader { public: - TabSeparatedFormatReader(ReadBuffer & in_, const FormatSettings & format_settings, bool is_raw_); + TabSeparatedFormatReader(PeekableReadBuffer & in_, const FormatSettings & format_settings, bool is_raw_); bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override; @@ -45,21 +53,32 @@ public: void skipRowEndDelimiter() override; void skipPrefixBeforeHeader() override; - std::vector readRow(); - std::vector readNames() override { return readRow(); } - std::vector readTypes() override { return readRow(); } + std::vector readRow() { return readRowImpl(); } + std::vector readNames() override { return readHeaderRow(); } + std::vector readTypes() override { return readHeaderRow(); } + std::vector readHeaderRow() { return readRowImpl(); } + + template String readFieldIntoString(); + std::vector readRowForHeaderDetection() override { return readHeaderRow(); } + void checkNullValueForNonNullable(DataTypePtr type) override; bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; - FormatSettings::EscapingRule getEscapingRule() const + FormatSettings::EscapingRule getEscapingRule() const override { return is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped; } + void setReadBuffer(ReadBuffer & in_) override; + private: + template + std::vector readRowImpl(); + + PeekableReadBuffer * buf; bool is_raw; bool first_row = true; }; @@ -70,8 +89,10 @@ public: TabSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; + std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + PeekableReadBuffer buf; TabSeparatedFormatReader reader; }; diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index 1b2af4e631c..6d8fe1e5a2c 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -88,7 +88,7 @@ TemplateBlockOutputFormat::ResultsetPart TemplateBlockOutputFormat::stringToResu else if (part == "bytes_read") return ResultsetPart::BytesRead; else - throw Exception("Unknown output part " + part, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unknown output part {}", part); } void TemplateBlockOutputFormat::writeRow(const Chunk & chunk, size_t row_num) diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 05fa17c7a17..1a519fa977f 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -21,9 +21,9 @@ namespace ErrorCodes [[noreturn]] static void throwUnexpectedEof(size_t row_num) { - throw ParsingException("Unexpected EOF while parsing row " + std::to_string(row_num) + ". " + throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA, "Unexpected EOF while parsing row {}. " "Maybe last row has wrong format or input doesn't contain specified suffix before EOF.", - ErrorCodes::CANNOT_READ_ALL_DATA); + std::to_string(row_num)); } static void updateFormatSettingsIfNeeded(FormatSettings::EscapingRule escaping_rule, FormatSettings & settings, const ParsedTemplateFormatString & row_format, char default_csv_delimiter, size_t file_column) @@ -544,8 +544,7 @@ static ParsedTemplateFormatString fillResultSetFormat(const FormatSettings & set { if (partName == "data") return 0; - throw Exception("Unknown input part " + partName, - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unknown input part {}", partName); }); } return resultset_format; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 7b9cb23ddf0..9511b37ff15 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -318,8 +318,8 @@ namespace size_t dst_tuple_size = type_tuple.getElements().size(); if (src_tuple_size != dst_tuple_size) - throw Exception(fmt::format("Bad size of tuple. Expected size: {}, actual size: {}.", - std::to_string(src_tuple_size), std::to_string(dst_tuple_size)), ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Bad size of tuple. Expected size: {}, actual size: {}.", + src_tuple_size, dst_tuple_size); for (size_t i = 0; i < src_tuple_size; ++i) { @@ -454,8 +454,8 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx if (shouldDeduceNewTemplate(column_idx)) { if (templates[column_idx]) - throw DB::Exception("Template for column " + std::to_string(column_idx) + " already exists and it was not evaluated yet", - ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Template for column {} already exists and it was not evaluated yet", + std::to_string(column_idx)); std::exception_ptr exception; try { @@ -497,7 +497,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx { buf->rollbackToCheckpoint(); size_t len = const_cast((*token_iterator)->begin) - buf->position(); - throw Exception("Cannot deduce template of expression: " + std::string(buf->position(), len), ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot deduce template of expression: {}", std::string(buf->position(), len)); } } /// Continue parsing without template @@ -505,7 +505,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx } if (!format_settings.values.interpret_expressions) - throw Exception("Interpreting expressions is disabled", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Interpreting expressions is disabled"); /// Try to evaluate single expression if other parsers don't work buf->position() = const_cast((*token_iterator)->begin); @@ -528,10 +528,9 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx return false; } buf->rollbackToCheckpoint(); - throw Exception{"Cannot insert NULL value into a column of type '" + type.getName() + "'" - + " at: " + - String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())), - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column of type '{}' at: {}", + type.getName(), String(buf->position(), + std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position()))); } column.insert(value); @@ -593,12 +592,12 @@ void ValuesBlockInputFormat::readSuffix() ++buf->position(); skipWhitespaceIfAny(*buf); if (buf->hasUnreadData()) - throw Exception("Cannot read data after semicolon", ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon"); return; } if (buf->hasUnreadData()) - throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unread data in PeekableReadBuffer will be lost. Most likely it's a bug."); } void ValuesBlockInputFormat::resetParser() diff --git a/src/Processors/Formats/InputFormatErrorsLogger.cpp b/src/Processors/Formats/InputFormatErrorsLogger.cpp index 88d27abf610..71d51f0e04a 100644 --- a/src/Processors/Formats/InputFormatErrorsLogger.cpp +++ b/src/Processors/Formats/InputFormatErrorsLogger.cpp @@ -37,7 +37,9 @@ InputFormatErrorsLogger::InputFormatErrorsLogger(const ContextPtr & context) auto user_files_path = context->getUserFilesPath(); errors_file_path = fs::path(user_files_path) / path_in_setting; if (!fileOrSymlinkPathStartsWith(errors_file_path, user_files_path)) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Cannot log errors in path `{}`, because it is not inside `{}`", errors_file_path, user_files_path); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Cannot log errors in path `{}`, because it is not inside `{}`", + errors_file_path, user_files_path); } else { diff --git a/src/Processors/Formats/PullingOutputFormat.cpp b/src/Processors/Formats/PullingOutputFormat.cpp index af237037a72..c2036ce37c9 100644 --- a/src/Processors/Formats/PullingOutputFormat.cpp +++ b/src/Processors/Formats/PullingOutputFormat.cpp @@ -14,8 +14,7 @@ WriteBuffer PullingOutputFormat::out(nullptr, 0); void PullingOutputFormat::consume(Chunk chunk) { if (data) - throw Exception("PullingOutputFormat cannot consume chunk because it already has data", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "PullingOutputFormat cannot consume chunk because it already has data"); if (chunk) info.update(chunk.getNumRows(), chunk.allocatedBytes()); diff --git a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp index 35a86bc476d..6358a99d6b4 100644 --- a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp +++ b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp @@ -138,7 +138,7 @@ bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(co auto * curr_position = in->position(); if (curr_position < prev_position) - throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: parsing is non-deterministic."); if (isNativeNumber(type) || isDate(type) || isDateTime(type) || isDateTime64(type)) { diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index e0e8ea47a7b..eaedbbb4a1e 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -1,9 +1,13 @@ #include #include #include +#include #include +#include #include #include +#include +#include namespace DB @@ -12,6 +16,30 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; +} + +namespace +{ + bool checkIfAllValuesAreTypeNames(const std::vector & names) + { + for (const auto & name : names) + { + if (!DataTypeFactory::instance().tryGet(name)) + return false; + } + return true; + } + + bool isSubsetOf(const std::unordered_set & subset, const std::unordered_set & set) + { + for (const auto & element : subset) + { + if (!set.contains(element)) + return false; + } + return true; + } } RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( @@ -22,7 +50,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( bool with_names_, bool with_types_, const FormatSettings & format_settings_, - std::unique_ptr format_reader_) + std::unique_ptr format_reader_, + bool try_detect_header_) : RowInputFormatWithDiagnosticInfo(header_, in_, params_) , format_settings(format_settings_) , data_types(header_.getDataTypes()) @@ -30,6 +59,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( , with_names(with_names_) , with_types(with_types_) , format_reader(std::move(format_reader_)) + , try_detect_header(try_detect_header_) { column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap(); } @@ -52,53 +82,117 @@ void RowInputFormatWithNamesAndTypes::readPrefix() /// Skip prefix before names and types. format_reader->skipPrefixBeforeHeader(); + std::vector column_names; + std::vector type_names; if (with_names) - { - if (format_settings.with_names_use_header) - { - auto column_names = format_reader->readNames(); - column_mapping->addColumns(column_names, column_indexes_by_names, format_settings); - } - else - { - column_mapping->setupByHeader(getPort().getHeader()); - format_reader->skipNames(); - } - } - else if (!column_mapping->is_set) - column_mapping->setupByHeader(getPort().getHeader()); + column_names = format_reader->readNames(); if (with_types) { /// Skip delimiter between names and types. format_reader->skipRowBetweenDelimiter(); - if (format_settings.with_types_use_header) + type_names = format_reader->readTypes(); + } + + if (!with_names && !with_types && try_detect_header) + tryDetectHeader(column_names, type_names); + + if (!column_names.empty()) + { + if (format_settings.with_names_use_header) + column_mapping->addColumns(column_names, column_indexes_by_names, format_settings); + else + column_mapping->setupByHeader(getPort().getHeader()); + } + else if (!column_mapping->is_set) + column_mapping->setupByHeader(getPort().getHeader()); + + if (!type_names.empty() && format_settings.with_types_use_header) + { + if (type_names.size() != column_mapping->column_indexes_for_input_fields.size()) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "The number of data types differs from the number of column names in input data"); + + /// Check that types from input matches types from header. + for (size_t i = 0; i < type_names.size(); ++i) { - auto types = format_reader->readTypes(); - if (types.size() != column_mapping->column_indexes_for_input_fields.size()) + if (column_mapping->column_indexes_for_input_fields[i] && + data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName() != type_names[i]) + { throw Exception( ErrorCodes::INCORRECT_DATA, - "The number of data types differs from the number of column names in input data"); - - /// Check that types from input matches types from header. - for (size_t i = 0; i < types.size(); ++i) - { - if (column_mapping->column_indexes_for_input_fields[i] && - data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName() != types[i]) - { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Type of '{}' must be {}, not {}", - getPort().getHeader().getByPosition(*column_mapping->column_indexes_for_input_fields[i]).name, - data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName(), types[i]); - } + "Type of '{}' must be {}, not {}", + getPort().getHeader().getByPosition(*column_mapping->column_indexes_for_input_fields[i]).name, + data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName(), type_names[i]); } } - else - format_reader->skipTypes(); } } +void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector & column_names_out, std::vector & type_names_out) +{ + auto & read_buf = getReadBuffer(); + PeekableReadBuffer * peekable_buf = dynamic_cast(&read_buf); + if (!peekable_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Header detection is supported only for formats that use PeekableReadBuffer"); + + /// Empty data. + if (unlikely(format_reader->checkForSuffix())) + { + end_of_stream = true; + return; + } + + /// Make a checkpoint before reading the first row. + peekable_buf->setCheckpoint(); + auto first_row_values = format_reader->readRowForHeaderDetection(); + + /// To understand if the first row is a header with column names, we check + /// that all values from this row is a subset of column names from provided header + /// or column names from provided header is a subset of values from this row + auto column_names = getPort().getHeader().getNames(); + std::unordered_set column_names_set(column_names.begin(), column_names.end()); + std::unordered_set first_row_values_set(first_row_values.begin(), first_row_values.end()); + if (!isSubsetOf(first_row_values_set, column_names_set) && !isSubsetOf(column_names_set, first_row_values_set)) + { + /// Rollback to the beginning of the first row to parse it as data later. + peekable_buf->rollbackToCheckpoint(true); + return; + } + + /// First row is a header with column names. + column_names_out = std::move(first_row_values); + peekable_buf->dropCheckpoint(); + is_header_detected = true; + + /// Data contains only 1 row and it's just names. + if (unlikely(format_reader->checkForSuffix())) + { + end_of_stream = true; + return; + } + + /// Make a checkpoint before reading the second row. + peekable_buf->setCheckpoint(); + + /// Skip delimiter between the first and the second rows. + format_reader->skipRowBetweenDelimiter(); + auto second_row_values = format_reader->readRowForHeaderDetection(); + + /// The second row can be a header with type names if it contains only valid type names. + if (!checkIfAllValuesAreTypeNames(second_row_values)) + { + /// Rollback to the beginning of the second row to parse it as data later. + peekable_buf->rollbackToCheckpoint(true); + return; + } + + /// The second row is a header with type names. + type_names_out = std::move(second_row_values); + peekable_buf->dropCheckpoint(); +} + bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext) { if (unlikely(end_of_stream)) @@ -112,7 +206,7 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE updateDiagnosticInfo(); - if (likely(row_num != 1 || (getCurrentUnitNumber() == 0 && (with_names || with_types)))) + if (likely(row_num != 1 || (getCurrentUnitNumber() == 0 && (with_names || with_types || is_header_detected)))) format_reader->skipRowBetweenDelimiter(); format_reader->skipRowStartDelimiter(); @@ -234,8 +328,9 @@ FormatWithNamesAndTypesSchemaReader::FormatWithNamesAndTypesSchemaReader( bool with_names_, bool with_types_, FormatWithNamesAndTypesReader * format_reader_, - DataTypePtr default_type_) - : IRowSchemaReader(in_, format_settings_, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_) + DataTypePtr default_type_, + bool try_detect_header_) + : IRowSchemaReader(in_, format_settings_, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_), try_detect_header(try_detect_header_) { } @@ -246,31 +341,173 @@ NamesAndTypesList FormatWithNamesAndTypesSchemaReader::readSchema() format_reader->skipPrefixBeforeHeader(); - Names names; + std::vector column_names; if (with_names) - names = format_reader->readNames(); + column_names = format_reader->readNames(); + std::vector data_type_names; if (with_types) { format_reader->skipRowBetweenDelimiter(); - std::vector data_type_names = format_reader->readTypes(); - if (data_type_names.size() != names.size()) + data_type_names = format_reader->readTypes(); + } + + if (!with_names && !with_types && try_detect_header) + tryDetectHeader(column_names, data_type_names); + + if (!data_type_names.empty()) + { + if (data_type_names.size() != column_names.size()) throw Exception( ErrorCodes::INCORRECT_DATA, - "The number of column names {} differs with the number of types {}", names.size(), data_type_names.size()); + "The number of column names {} differs with the number of types {}", column_names.size(), data_type_names.size()); NamesAndTypesList result; for (size_t i = 0; i != data_type_names.size(); ++i) - result.emplace_back(names[i], DataTypeFactory::instance().get(data_type_names[i])); + result.emplace_back(column_names[i], DataTypeFactory::instance().get(data_type_names[i])); return result; } - if (!names.empty()) - setColumnNames(names); + if (!column_names.empty()) + setColumnNames(column_names); /// We should determine types by reading rows with data. Use the implementation from IRowSchemaReader. return IRowSchemaReader::readSchema(); } +namespace +{ + bool checkIfAllTypesAreString(const DataTypes & types) + { + for (const auto & type : types) + if (!type || !isString(removeNullable(removeLowCardinality(type)))) + return false; + return true; + } + + bool haveNotStringAndNotNullType(const DataTypes & types) + { + for (const auto & type : types) + if (type && !isString(removeNullable(removeLowCardinality(type))) && !type->onlyNull()) + return true; + return false; + } +} + +void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & column_names, std::vector & type_names) +{ + auto [first_row_values, first_row_types] = readRowAndGetFieldsAndDataTypes(); + + /// No data. + if (first_row_values.empty()) + return; + + /// The first row contains non String elements, it cannot be a header. + if (!checkIfAllTypesAreString(first_row_types)) + { + buffered_types = first_row_types; + return; + } + + auto [second_row_values, second_row_types] = readRowAndGetFieldsAndDataTypes(); + + /// Data contains only 1 row, don't treat it as a header. + if (second_row_values.empty()) + { + buffered_types = first_row_types; + return; + } + + DataTypes data_types; + bool second_row_can_be_type_names = checkIfAllTypesAreString(second_row_types) && checkIfAllValuesAreTypeNames(readNamesFromFields(second_row_values)); + size_t row = 2; + if (!second_row_can_be_type_names) + { + data_types = second_row_types; + } + else + { + data_types = readRowAndGetDataTypes(); + /// Data contains only 2 rows. + if (data_types.empty()) + { + second_row_can_be_type_names = false; + data_types = second_row_types; + } + else + { + ++row; + } + } + + /// Create default names c1,c2,... for better exception messages. + std::vector default_colum_names; + default_colum_names.reserve(first_row_types.size()); + for (size_t i = 0; i != first_row_types.size(); ++i) + default_colum_names.push_back("c" + std::to_string(i + 1)); + + while (true) + { + /// Check if we have element that is not String and not Null. It means that the first two rows + /// with all String elements are most likely a header. + if (haveNotStringAndNotNullType(data_types)) + { + buffered_types = data_types; + column_names = readNamesFromFields(first_row_values); + if (second_row_can_be_type_names) + type_names = readNamesFromFields(second_row_values); + return; + } + + /// Check if we have all elements with type String. It means that the first two rows + /// with all String elements can be real data and we cannot use them as a header. + if (checkIfAllTypesAreString(data_types)) + { + buffered_types = std::move(data_types); + return; + } + + auto next_row_types = readRowAndGetDataTypes(); + /// Check if there are no more rows in data. It means that all rows contains only String values and Nulls, + /// so, the first two rows with all String elements can be real data and we cannot use them as a header. + if (next_row_types.empty()) + { + /// Buffer first data types from the first row, because it doesn't contain Nulls. + buffered_types = first_row_types; + return; + } + + ++row; + /// Combine types from current row and from previous rows. + chooseResultColumnTypes(*this, data_types, next_row_types, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row); + } +} + +DataTypes FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes() +{ + /// Check if we tried to detect a header and have buffered types from read rows. + if (!buffered_types.empty()) + { + DataTypes res; + std::swap(res, buffered_types); + return res; + } + + return readRowAndGetDataTypesImpl(); +} + +std::vector FormatWithNamesAndTypesSchemaReader::readNamesFromFields(const std::vector & fields) +{ + std::vector names; + names.reserve(fields.size()); + auto escaping_rule = format_reader->getEscapingRule(); + for (const auto & field : fields) + { + ReadBufferFromString field_buf(field); + names.emplace_back(readStringByEscapingRule(field_buf, escaping_rule, format_settings)); + } + return names; +} + } diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index b3066f0bdbb..5648acd392d 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + class FormatWithNamesAndTypesReader; /// Base class for input formats with -WithNames and -WithNamesAndTypes suffixes. @@ -36,7 +41,8 @@ protected: bool with_names_, bool with_types_, const FormatSettings & format_settings_, - std::unique_ptr format_reader_); + std::unique_ptr format_reader_, + bool try_detect_header_ = false); void resetParser() override; bool isGarbageAfterField(size_t index, ReadBuffer::Position pos) override; @@ -53,10 +59,14 @@ private: bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; + void tryDetectHeader(std::vector & column_names, std::vector & type_names); + bool is_binary; bool with_names; bool with_types; std::unique_ptr format_reader; + bool try_detect_header; + bool is_header_detected = false; protected: Block::NameMap column_indexes_by_names; @@ -86,6 +96,12 @@ public: /// Read row with types and return the list of them. virtual std::vector readTypes() = 0; + /// Read row with raw values. + virtual std::vector readRowForHeaderDetection() + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetFieldsAndDataTypes is not implemented for format reader"); + } + /// Skip single field, it's used to skip unknown columns. virtual void skipField(size_t file_column) = 0; /// Skip the whole row with names. @@ -109,6 +125,11 @@ public: virtual ~FormatWithNamesAndTypesReader() = default; + virtual FormatSettings::EscapingRule getEscapingRule() const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Format reader doesn't have an escaping rule"); + } + protected: ReadBuffer * in; FormatSettings format_settings; @@ -129,18 +150,35 @@ public: bool with_names_, bool with_types_, FormatWithNamesAndTypesReader * format_reader_, - DataTypePtr default_type_ = nullptr); + DataTypePtr default_type_ = nullptr, + bool try_detect_header_ = false); NamesAndTypesList readSchema() override; protected: - virtual DataTypes readRowAndGetDataTypes() override = 0; + virtual DataTypes readRowAndGetDataTypes() override; + + virtual DataTypes readRowAndGetDataTypesImpl() + { + throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypesImpl is not implemented"}; + } + + /// Return column fields with inferred types. In case of no more rows, return empty vectors. + virtual std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() + { + throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetFieldsAndDataTypes is not implemented"}; + } bool with_names; bool with_types; private: + void tryDetectHeader(std::vector & column_names_out, std::vector & type_names_out); + std::vector readNamesFromFields(const std::vector & fields); + FormatWithNamesAndTypesReader * format_reader; + bool try_detect_header; + DataTypes buffered_types; }; } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 35b45543151..00d5b2ee089 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -17,7 +17,7 @@ IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_ InputPort * IAccumulatingTransform::addTotalsPort() { if (inputs.size() > 1) - throw Exception("Totals port was already added to IAccumulatingTransform", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals port was already added to IAccumulatingTransform"); return &inputs.emplace_back(getInputPort().getHeader(), this); } @@ -108,8 +108,9 @@ void IAccumulatingTransform::work() void IAccumulatingTransform::setReadyChunk(Chunk chunk) { if (current_output_chunk) - throw Exception("IAccumulatingTransform already has input. Cannot set another chunk. " - "Probably, setReadyChunk method was called twice per consume().", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "IAccumulatingTransform already has input. " + "Cannot set another chunk. Probably, setReadyChunk method was called twice per consume()."); current_output_chunk = std::move(chunk); } diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp index f54d6458c10..ffa5b55dc76 100644 --- a/src/Processors/IInflatingTransform.cpp +++ b/src/Processors/IInflatingTransform.cpp @@ -67,7 +67,7 @@ void IInflatingTransform::work() if (can_generate) { if (generated) - throw Exception("IInflatingTransform cannot consume chunk because it already was generated", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it already was generated"); current_chunk = generate(); generated = true; @@ -76,7 +76,7 @@ void IInflatingTransform::work() else { if (!has_input) - throw Exception("IInflatingTransform cannot consume chunk because it wasn't read", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it wasn't read"); consume(std::move(current_chunk)); has_input = false; diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index ffff8c30904..2feee7e65b1 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -19,7 +19,7 @@ LimitTransform::LimitTransform( , with_ties(with_ties_), description(std::move(description_)) { if (num_streams != 1 && with_ties) - throw Exception("Cannot use LimitTransform with multiple ports and ties", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot use LimitTransform with multiple ports and ties"); ports_data.resize(num_streams); @@ -125,8 +125,7 @@ IProcessor::Status LimitTransform::prepare( LimitTransform::Status LimitTransform::prepare() { if (ports_data.size() != 1) - throw Exception("prepare without arguments is not supported for multi-port LimitTransform", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "prepare without arguments is not supported for multi-port LimitTransform"); return prepare({0}, {0}); } diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index db08f3ffbd3..560be60987b 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -221,7 +221,7 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::finishGroup() void AggregatingSortedAlgorithm::AggregatingMergedData::addRow(SortCursor & cursor) { if (!is_group_started) - throw Exception("Can't add a row to the group because it was not started.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't add a row to the group because it was not started."); for (auto & desc : def.columns_to_aggregate) desc.column->insertMergeFrom(*cursor->all_columns[desc.column_number], cursor->getRow()); @@ -236,7 +236,7 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::addRow(SortCursor & curs Chunk AggregatingSortedAlgorithm::AggregatingMergedData::pull() { if (is_group_started) - throw Exception("Can't pull chunk because group was not finished.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't pull chunk because group was not finished."); auto chunk = MergedData::pull(); postprocessChunk(chunk, def); diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 1cca2510197..0c23dd51f3c 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -194,8 +194,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() last_is_positive = false; } else - throw Exception("Incorrect data: Sign = " + toString(sign) + " (must be 1 or -1).", - ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect data: Sign = {} (must be 1 or -1).", toString(sign)); ++current_pos; diff --git a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h index ff8f113d9a6..3f04d087861 100644 --- a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h +++ b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h @@ -144,20 +144,20 @@ private: void checkEnoughSpaceToInsert() const { if (size() + 1 == container.size()) - throw Exception("Not enough space to insert into FixedSizeDequeWithGaps with capacity " - + std::to_string(container.size() - 1), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough space to insert into FixedSizeDequeWithGaps with capacity {}", + container.size() - 1); } void checkHasValuesToRemove() const { if (empty()) - throw Exception("Cannot remove from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove from empty FixedSizeDequeWithGaps"); } void checkHasValuesToGet() const { if (empty()) - throw Exception("Cannot get value from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get value from empty FixedSizeDequeWithGaps"); } }; diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 0616c4bd6e6..418bf5e3f13 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -43,7 +43,7 @@ const String & ruleTypeStr(RuleType rule_type) } catch (...) { - throw Exception("invalid rule type: " + std::to_string(rule_type), DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "invalid rule type: {}", std::to_string(rule_type)); } } @@ -58,7 +58,7 @@ RuleType ruleType(const String & s) else if (s == "tag_list") return RuleTypeTagList; else - throw Exception("invalid rule type: " + s, DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "invalid rule type: {}", s); } static const Graphite::Pattern undef_pattern = @@ -166,7 +166,7 @@ static bool compareRetentions(const Retention & a, const Retention & b) String error_msg = "age and precision should only grow up: " + std::to_string(a.age) + ":" + std::to_string(a.precision) + " vs " + std::to_string(b.age) + ":" + std::to_string(b.precision); - throw Exception( + throw Exception::createDeprecated( error_msg, DB::ErrorCodes::BAD_ARGUMENTS); } @@ -374,7 +374,7 @@ static const Pattern & appendGraphitePattern( .precision = config.getUInt(config_element + "." + key + ".precision")}); } else - throw Exception("Unknown element in config: " + key, DB::ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(DB::ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key); } if (!pattern.regexp_str.empty()) @@ -389,15 +389,13 @@ static const Pattern & appendGraphitePattern( } if (!pattern.function && pattern.retentions.empty()) - throw Exception( - "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree", - DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(DB::ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "At least one of an aggregate function or retention rules is mandatory for rollup patterns in GraphiteMergeTree"); if (default_rule && pattern.rule_type != RuleTypeAll) { - throw Exception( - "Default must have rule_type all for rollup patterns in GraphiteMergeTree", - DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, + "Default must have rule_type all for rollup patterns in GraphiteMergeTree"); } if (!pattern.function) @@ -415,8 +413,8 @@ static const Pattern & appendGraphitePattern( if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll if (pattern.function->allocatesMemoryInArena()) - throw Exception( - "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", DB::ErrorCodes::NOT_IMPLEMENTED); + throw Exception(DB::ErrorCodes::NOT_IMPLEMENTED, + "Aggregate function {} isn't supported in GraphiteMergeTree", pattern.function->getName()); /// retention should be in descending order of age. if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll @@ -431,7 +429,7 @@ void setGraphitePatternsFromConfig(ContextPtr context, const String & config_ele const auto & config = context->getConfigRef(); if (!config.has(config_element)) - throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No '{}' element in configuration file", config_element); params.config_name = config_element; params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); @@ -460,7 +458,7 @@ void setGraphitePatternsFromConfig(ContextPtr context, const String & config_ele /// See above. } else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key); } if (config.has(config_element + ".default")) @@ -486,7 +484,7 @@ void setGraphitePatternsFromConfig(ContextPtr context, const String & config_ele } else { - throw Exception("Unhandled rule_type in config: " + ruleTypeStr(pattern.rule_type), ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unhandled rule_type in config: {}", ruleTypeStr(pattern.rule_type)); } } } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index c5937fe0bc5..123748f9b43 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -33,7 +33,7 @@ static GraphiteRollupSortedAlgorithm::ColumnsDefinition defineColumns( def.unmodified_column_numbers.push_back(i); if (!WhichDataType(header.getByPosition(def.value_column_num).type).isFloat64()) - throw Exception("Only `Float64` data type is allowed for the value column of GraphiteMergeTree", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only `Float64` data type is allowed for the value column of GraphiteMergeTree"); return def; } diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 6029809f0f2..24b83013aee 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -57,8 +57,7 @@ public: void insertChunk(Chunk && chunk, size_t rows_size) { if (merged_rows) - throw Exception("Cannot insert to MergedData from Chunk because MergedData is not empty.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert to MergedData from Chunk because MergedData is not empty."); UInt64 num_rows = chunk.getNumRows(); columns = chunk.mutateColumns(); diff --git a/src/Processors/Merges/Algorithms/RowRef.h b/src/Processors/Merges/Algorithms/RowRef.h index a7646aa701f..cf741c1b53b 100644 --- a/src/Processors/Merges/Algorithms/RowRef.h +++ b/src/Processors/Merges/Algorithms/RowRef.h @@ -66,8 +66,8 @@ public: SharedChunkPtr alloc(Chunk & chunk) { if (free_chunks.empty()) - throw Exception("Not enough space in SharedChunkAllocator. " - "Chunks allocated: " + std::to_string(chunks.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough space in SharedChunkAllocator. Chunks allocated: {}", + chunks.size()); auto pos = free_chunks.back(); free_chunks.pop_back(); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index ee3177e132f..0f1775d4ac0 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -486,9 +486,8 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num, if (i < column_names.size()) column_name = column_names[i]; - throw Exception("SummingSortedAlgorithm failed to read row " + toString(row_num) - + " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"), - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "SummingSortedAlgorithm failed to read row {} of column {})", + toString(row_num), toString(i) + (column_name.empty() ? "" : " (" + column_name)); } } } @@ -630,8 +629,7 @@ void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_c for (auto & desc : def.columns_to_aggregate) { if (!desc.created) - throw Exception("Logical error in SummingSortedAlgorithm, there are no description", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in SummingSortedAlgorithm, there are no description"); if (desc.is_agg_func_type) { diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 1b847069fea..4a6a1662f16 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -42,13 +42,13 @@ IMergingTransformBase::IMergingTransformBase( void IMergingTransformBase::onNewInput() { - throw Exception("onNewInput is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "onNewInput is not implemented for {}", getName()); } void IMergingTransformBase::addInput() { if (have_all_inputs) - throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IMergingTransform already have all inputs."); inputs.emplace_back(outputs.front().getHeader(), this); onNewInput(); @@ -57,7 +57,7 @@ void IMergingTransformBase::addInput() void IMergingTransformBase::setHaveAllInputs() { if (have_all_inputs) - throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IMergingTransform already have all inputs."); have_all_inputs = true; } diff --git a/src/Processors/OffsetTransform.cpp b/src/Processors/OffsetTransform.cpp index 40a0833ffb4..dbb8bebfce6 100644 --- a/src/Processors/OffsetTransform.cpp +++ b/src/Processors/OffsetTransform.cpp @@ -61,7 +61,8 @@ IProcessor::Status OffsetTransform::prepare(const PortNumbers & updated_input_po return; default: throw Exception( - ErrorCodes::LOGICAL_ERROR, "Unexpected status for OffsetTransform::preparePair : {}", IProcessor::statusToName(status)); + ErrorCodes::LOGICAL_ERROR, "Unexpected status for OffsetTransform::preparePair : {}", + IProcessor::statusToName(status)); } }; @@ -84,8 +85,7 @@ IProcessor::Status OffsetTransform::prepare(const PortNumbers & updated_input_po OffsetTransform::Status OffsetTransform::prepare() { if (ports_data.size() != 1) - throw Exception("prepare without arguments is not supported for multi-port OffsetTransform", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "prepare without arguments is not supported for multi-port OffsetTransform"); return prepare({0}, {0}); } diff --git a/src/Processors/Port.h b/src/Processors/Port.h index 9163402f600..67af2f041aa 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -89,7 +89,7 @@ protected: DataPtr() : data(new Data()) { if (unlikely((getUInt(data) & FLAGS_MASK) != 0)) - throw Exception("Not alignment memory for Port", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not alignment memory for Port"); } /// Pointer can store flags in case of exception in swap. ~DataPtr() { delete getPtr(getUInt(data) & PTR_MASK); } @@ -133,7 +133,7 @@ protected: State() : data(new Data()) { if (unlikely((getUInt(data) & FLAGS_MASK) != 0)) - throw Exception("Not alignment memory for Port", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not alignment memory for Port"); } ~State() @@ -153,14 +153,14 @@ protected: /// It's possible to push data into finished port. Will just ignore it. /// if (flags & IS_FINISHED) - /// throw Exception("Cannot push block to finished port.", ErrorCodes::LOGICAL_ERROR); + /// throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push block to finished port."); /// It's possible to push data into port which is not needed now. /// if ((flags & IS_NEEDED) == 0) - /// throw Exception("Cannot push block to port which is not needed.", ErrorCodes::LOGICAL_ERROR); + /// throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push block to port which is not needed."); if (unlikely(flags & HAS_DATA)) - throw Exception("Cannot push block to port which already has data", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push block to port which already has data"); } void ALWAYS_INLINE pull(DataPtr & data_, std::uintptr_t & flags, bool set_not_needed = false) @@ -174,10 +174,10 @@ protected: /// It's ok to check because this flag can be changed only by pulling thread. if (unlikely((flags & IS_NEEDED) == 0) && !set_not_needed) - throw Exception("Cannot pull block from port which is not needed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot pull block from port which is not needed"); if (unlikely((flags & HAS_DATA) == 0)) - throw Exception("Cannot pull block from port which has no data", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot pull block from port which has no data"); } std::uintptr_t ALWAYS_INLINE setFlags(std::uintptr_t flags, std::uintptr_t mask) @@ -225,7 +225,7 @@ public: void ALWAYS_INLINE assumeConnected() const { if (unlikely(!isConnected())) - throw Exception("Port is not connected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Port is not connected"); } bool ALWAYS_INLINE hasData() const @@ -237,14 +237,14 @@ public: IProcessor & getProcessor() { if (!processor) - throw Exception("Port does not belong to Processor", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Port does not belong to Processor"); return *processor; } const IProcessor & getProcessor() const { if (!processor) - throw Exception("Port does not belong to Processor", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Port does not belong to Processor"); return *processor; } diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index e42642ceff8..86b64d1519c 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -198,7 +198,7 @@ void CreateSetAndFilterOnTheFlyStep::updateOutputStream() own_set->setHeader(getColumnSubset(input_streams[0].header, column_names)); - output_stream = input_streams[0]; + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index b52d86aa725..23e0a17a31b 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -77,7 +77,7 @@ void CreatingSetStep::describeActions(JSONBuilder::JSONMap & map) const CreatingSetsStep::CreatingSetsStep(DataStreams input_streams_) { if (input_streams_.empty()) - throw Exception("CreatingSetsStep cannot be created with no inputs", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "CreatingSetsStep cannot be created with no inputs"); input_streams = std::move(input_streams_); output_stream = input_streams.front(); @@ -91,7 +91,7 @@ CreatingSetsStep::CreatingSetsStep(DataStreams input_streams_) QueryPipelineBuilderPtr CreatingSetsStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { if (pipelines.empty()) - throw Exception("CreatingSetsStep cannot be created with no inputs", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "CreatingSetsStep cannot be created with no inputs"); auto main_pipeline = std::move(pipelines.front()); if (pipelines.size() == 1) diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 8a370786820..dde3bdbf850 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -33,7 +33,7 @@ FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_ , sort_description(std::move(sort_description_)), interpolate_description(interpolate_description_) { if (!input_stream_.has_single_port) - throw Exception("FillingStep expects single input", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); } void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) @@ -60,7 +60,7 @@ void FillingStep::describeActions(JSONBuilder::JSONMap & map) const void FillingStep::updateOutputStream() { if (!input_streams.front().has_single_port) - throw Exception("FillingStep expects single input", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); output_stream = createOutputStream( input_streams.front(), FillingTransform::transformHeader(input_streams.front().header, sort_description), getDataStreamTraits()); diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp index a0035089c29..10352b330af 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.cpp +++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes const DataStream & IQueryPlanStep::getOutputStream() const { if (!hasOutputStream()) - throw Exception("QueryPlanStep " + getName() + " does not have output stream.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPlanStep {} does not have output stream.", getName()); return *output_stream; } diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 7f435463d64..4b587ada2c0 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -58,6 +58,9 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, /// Reading in order from MergeTree table if DISTINCT columns match or form a prefix of MergeTree sorting key size_t tryDistinctReadInOrder(QueryPlan::Node * node); +/// Remove redundant sorting +void tryRemoveRedundantSorting(QueryPlan::Node * root); + /// Put some steps under union, so that plan optimisation could be applied to union parts separately. /// For example, the plan can be rewritten like: /// - Something - - Expression - Something - diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 00abd803d2a..a2a69ae1f69 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -14,6 +14,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.distinct_in_order = from.optimize_distinct_in_order; settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order; settings.aggregation_in_order = from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order; + settings.remove_redundant_sorting = from.query_plan_remove_redundant_sorting; return settings; } diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index d4989b86b68..b894e5caf1d 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -30,6 +30,9 @@ struct QueryPlanOptimizationSettings /// If aggregation-in-order optimisation is enabled bool aggregation_in_order = false; + /// If removing redundant sorting is enabled, for example, ORDER BY clauses in subqueries + bool remove_redundant_sorting = true; + static QueryPlanOptimizationSettings fromSettings(const Settings & from); static QueryPlanOptimizationSettings fromContext(ContextPtr from); }; diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp new file mode 100644 index 00000000000..20d964dcb4f --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -0,0 +1,362 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ +template +class QueryPlanVisitor +{ +protected: + struct FrameWithParent + { + QueryPlan::Node * node = nullptr; + QueryPlan::Node * parent_node = nullptr; + size_t next_child = 0; + }; + + using StackWithParent = std::vector; + + QueryPlan::Node * root = nullptr; + StackWithParent stack; + +public: + explicit QueryPlanVisitor(QueryPlan::Node * root_) : root(root_) { } + + void visit() + { + stack.push_back({.node = root}); + + while (!stack.empty()) + { + auto & frame = stack.back(); + + QueryPlan::Node * current_node = frame.node; + QueryPlan::Node * parent_node = frame.parent_node; + + logStep("back", current_node); + + /// top-down visit + if (0 == frame.next_child) + { + logStep("top-down", current_node); + if (!visitTopDown(current_node, parent_node)) + continue; + } + /// Traverse all children + if (frame.next_child < frame.node->children.size()) + { + auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node}; + ++frame.next_child; + logStep("push", next_frame.node); + stack.push_back(next_frame); + continue; + } + + /// bottom-up visit + logStep("bottom-up", current_node); + visitBottomUp(current_node, parent_node); + + logStep("pop", current_node); + stack.pop_back(); + } + } + + bool visitTopDown(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + return getDerived().visitTopDownImpl(current_node, parent_node); + } + void visitBottomUp(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + getDerived().visitBottomUpImpl(current_node, parent_node); + } + +private: + Derived & getDerived() { return *static_cast(this); } + + const Derived & getDerived() const { return *static_cast(this); } + + std::unordered_map address2name; + std::unordered_map name_gen; + + std::string getStepId(const IQueryPlanStep* step) + { + const auto step_name = step->getName(); + auto it = address2name.find(step); + if (it != address2name.end()) + return it->second; + + const auto seq_num = name_gen[step_name]++; + return address2name.insert({step, fmt::format("{}{}", step_name, seq_num)}).first->second; + } + +protected: + void logStep(const char * prefix, const QueryPlan::Node * node) + { + if constexpr (debug_logging) + { + const IQueryPlanStep * current_step = node->step.get(); + LOG_DEBUG( + &Poco::Logger::get("QueryPlanVisitor"), + "{}: {}: {}", + prefix, + getStepId(current_step), + reinterpret_cast(current_step)); + } + } +}; + +constexpr bool debug_logging_enabled = false; + +class RemoveRedundantSorting : public QueryPlanVisitor +{ + /// stack with nodes which affect order + /// nodes added when traversing top-down + /// as soon as all children for the node on top of stack are traversed, the node is removed from stack + std::vector nodes_affect_order; + +public: + explicit RemoveRedundantSorting(QueryPlan::Node * root_) : QueryPlanVisitor(root_) { } + + bool visitTopDownImpl(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + IQueryPlanStep * current_step = current_node->step.get(); + + /// if there is parent node which can affect order and current step is sorting + /// then check if we can remove the sorting step (and corresponding expression step) + if (!nodes_affect_order.empty() && typeid_cast(current_step)) + { + if (tryRemoveSorting(current_node, parent_node)) + { + logStep("step affect sorting", nodes_affect_order.back()); + logStep("removed from plan", current_node); + + auto & frame = stack.back(); + /// mark removed node as visited + frame.next_child = frame.node->children.size(); + + /// current sorting step has been removed from plan, its parent has new children, need to visit them + auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node}; + stack.push_back(next_frame); + logStep("push", next_frame.node); + return false; + } + } + + if (typeid_cast(current_step) + || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (4) aggregation change order + { + logStep("nodes_affect_order/push", current_node); + nodes_affect_order.push_back(current_node); + } + + return true; + } + + void visitBottomUpImpl(QueryPlan::Node * current_node, QueryPlan::Node *) + { + /// we come here when all children of current_node are visited, + /// so, if it's a node which affect order, remove it from the corresponding stack + if (!nodes_affect_order.empty() && nodes_affect_order.back() == current_node) + { + logStep("nodes_affect_order/pop", current_node); + nodes_affect_order.pop_back(); + } + } + +private: + bool tryRemoveSorting(QueryPlan::Node * sorting_node, QueryPlan::Node * parent_node) + { + if (!canRemoveCurrentSorting()) + return false; + + /// remove sorting + parent_node->children.front() = sorting_node->children.front(); + + /// sorting removed, so need to update sorting traits for upstream steps + const DataStream * input_stream = &parent_node->children.front()->step->getOutputStream(); + chassert(parent_node == (stack.rbegin() + 1)->node); /// skip element on top of stack since it's sorting which was just removed + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const QueryPlan::Node * node = it->node; + /// skip removed sorting steps + auto * step = node->step.get(); + if (typeid_cast(step) && node != nodes_affect_order.back()) + continue; + + logStep("update sorting traits", node); + + auto * trans = dynamic_cast(step); + if (!trans) + { + logStep("stop update sorting traits: node is not transforming step", node); + break; + } + + trans->updateInputStream(*input_stream); + input_stream = &trans->getOutputStream(); + + /// update sorting properties though stack until reach node which affects order (inclusive) + if (node == nodes_affect_order.back()) + { + logStep("stop update sorting traits: reached node which affect order", node); + break; + } + } + + return true; + } + + bool canRemoveCurrentSorting() + { + chassert(!stack.empty()); + chassert(typeid_cast(stack.back().node->step.get())); + + return checkNodeAffectingOrder(nodes_affect_order.back()) && checkPathFromCurrentSortingNode(nodes_affect_order.back()); + } + + static bool checkNodeAffectingOrder(QueryPlan::Node * node_affect_order) + { + IQueryPlanStep * step_affect_order = node_affect_order->step.get(); + + /// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + /// if ORDER BY is with FILL WITH, it is non-removable + if (typeid_cast(step_affect_order) || typeid_cast(step_affect_order) + || typeid_cast(step_affect_order)) + return false; + + /// (1) aggregation + if (const AggregatingStep * parent_aggr = typeid_cast(step_affect_order); parent_aggr) + { + if (parent_aggr->inOrder()) + return false; + + auto const & aggregates = parent_aggr->getParams().aggregates; + for (const auto & aggregate : aggregates) + { + auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(aggregate.function->getName()); + if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) + return false; + + /// sum*() with Floats depends on order + /// but currently, there is no way to specify property `is_order_dependent` for combination of aggregating function and data type as argument + /// so, we check explicitly for sum*() functions with Floats here + const auto aggregate_function = aggregate.function; + const String & func_name = aggregate_function->getName(); + if (func_name.starts_with("sum")) + { + DataTypePtr data_type = aggregate_function->getArgumentTypes().front(); + if (WhichDataType(removeNullable(data_type)).isFloat()) + return false; + } + } + return true; + } + /// (2) sorting + else if (const auto * next_sorting = typeid_cast(step_affect_order); next_sorting) + { + if (next_sorting->getType() == SortingStep::Type::Full) + return true; + } + + return false; + } + + bool checkPathFromCurrentSortingNode(const QueryPlan::Node * node_affect_order) + { + chassert(!stack.empty()); + chassert(typeid_cast(stack.back().node->step.get())); + + /// (1) if there is expression with stateful function between current step + /// and step which affects order, then we need to keep sorting since + /// stateful function output can depend on order + + /// skip element on top of stack since it's sorting + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const QueryPlan::Node * node = it->node; + /// walking though stack until reach node which affects order + if (node == node_affect_order) + break; + + const auto * step = node->step.get(); + /// skip removed sorting steps + if (typeid_cast(step)) + continue; + + logStep("checking for stateful function", node); + if (const auto * expr = typeid_cast(step); expr) + { + if (expr->getExpression()->hasStatefulFunctions()) + return false; + } + else if (const auto * filter = typeid_cast(step); filter) + { + if (filter->getExpression()->hasStatefulFunctions()) + return false; + } + else + { + const auto * trans = dynamic_cast(step); + if (!trans) + break; + + if (!trans->getDataStreamTraits().preserves_sorting) + break; + } + } + + /// check steps on stack if there are some which can prevent from removing SortingStep + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const QueryPlan::Node * node = it->node; + /// walking though stack until reach node which affects order + if (node == node_affect_order) + break; + + const auto * step = node->step.get(); + /// skip removed sorting steps + if (typeid_cast(step)) + continue; + + logStep("checking path from current sorting", node); + + /// (2) for window function we do ORDER BY in 2 Sorting steps, + /// so do not delete Sorting if window function step is on top + if (typeid_cast(step)) + return false; + + if (const auto * join_step = typeid_cast(step); join_step) + { + if (typeid_cast(join_step->getJoin().get())) + return false; + } + } + + return true; + } +}; + +void tryRemoveRedundantSorting(QueryPlan::Node * root) +{ + RemoveRedundantSorting(root).visit(); +} + +} diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index e1662d43015..e817a9ef8a9 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -34,13 +34,13 @@ QueryPlan & QueryPlan::operator=(QueryPlan &&) noexcept = default; void QueryPlan::checkInitialized() const { if (!isInitialized()) - throw Exception("QueryPlan was not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPlan was not initialized"); } void QueryPlan::checkNotCompleted() const { if (isCompleted()) - throw Exception("QueryPlan was already completed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPlan was already completed"); } bool QueryPlan::isCompleted() const @@ -58,8 +58,7 @@ const DataStream & QueryPlan::getCurrentDataStream() const void QueryPlan::unitePlans(QueryPlanStepPtr step, std::vector> plans) { if (isInitialized()) - throw Exception("Cannot unite plans because current QueryPlan is already initialized", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unite plans because current QueryPlan is already initialized"); const auto & inputs = step->getInputStreams(); size_t num_inputs = step->getInputStreams().size(); @@ -447,6 +446,12 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings) { + /// optimization need to be applied before "mergeExpressions" optimization + /// it removes redundant sorting steps, but keep underlying expressions, + /// so "mergeExpressions" optimization handles them afterwards + if (optimization_settings.remove_redundant_sorting) + QueryPlanOptimizations::tryRemoveRedundantSorting(root); + QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f729e9e1383..22245b82966 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -581,8 +581,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( while (need_marks > 0) { if (part.ranges.empty()) - throw Exception("Unexpected end of ranges while spreading marks among streams", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected end of ranges while spreading marks among streams"); MarkRange & range = part.ranges.front(); diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 8766c0ba335..66cf94bfb55 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -70,7 +70,7 @@ SortingStep::SortingStep( , optimize_sorting_by_input_stream_properties(optimize_sorting_by_input_stream_properties_) { if (sort_settings.max_bytes_before_external_sort && sort_settings.tmp_data == nullptr) - throw Exception("Temporary data storage for external sorting is not provided", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary data storage for external sorting is not provided"); /// TODO: check input_stream is partially sorted by the same description. output_stream->sort_description = result_description; @@ -282,7 +282,16 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build { /// skip sorting if stream is already sorted if (input_sort_mode == DataStream::SortScope::Global && input_sort_desc.hasPrefix(result_description)) + { + if (pipeline.getNumStreams() != 1) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "If input stream is globally sorted then there should be only 1 input stream at this stage. Number of input streams: " + "{}", + pipeline.getNumStreams()); + return; + } /// merge sorted if (input_sort_mode == DataStream::SortScope::Stream && input_sort_desc.hasPrefix(result_description)) diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index 6b37c10b3e7..8167fae9baf 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -343,12 +343,12 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in inputs_with_data.pop(); if (input_with_data.waiting_output == -1) - throw Exception("No associated output for input with data", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No associated output for input with data"); auto & waiting_output = output_ports[input_with_data.waiting_output]; if (waiting_output.status == OutputStatus::NotActive) - throw Exception("Invalid status NotActive for associated output", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid status NotActive for associated output"); if (waiting_output.status != OutputStatus::Finished) { diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index ecc80bef40b..434d413a238 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -188,7 +188,7 @@ namespace if (hhmmss.size() == 3) v = static_cast((std::stoi(hhmmss[0]) * 3600 + std::stoi(hhmmss[1]) * 60 + std::stold(hhmmss[2])) * 1000000); else - throw Exception("Unsupported value format", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported value format"); if (negative) v = -v; assert_cast(column).insertValue(v); @@ -260,7 +260,7 @@ namespace read_bytes_size += column.sizeOfValueIfFixed(); break; default: - throw Exception("Unsupported value type", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported value type"); } } diff --git a/src/Processors/Sources/SourceFromChunks.cpp b/src/Processors/Sources/SourceFromChunks.cpp new file mode 100644 index 00000000000..7b73b877d2e --- /dev/null +++ b/src/Processors/Sources/SourceFromChunks.cpp @@ -0,0 +1,47 @@ +#include + +namespace DB +{ + +SourceFromChunks::SourceFromChunks(Block header, Chunks chunks_) + : SourceFromChunks(header, std::make_shared(std::move(chunks_)), true) +{} + +SourceFromChunks::SourceFromChunks(Block header, std::shared_ptr chunks_) + : SourceFromChunks(header, chunks_, false) +{} + +SourceFromChunks::SourceFromChunks(Block header, std::shared_ptr chunks_, bool move_from_chunks_) + : ISource(std::move(header)) + , chunks(chunks_) + , it(chunks->begin()) + , move_from_chunks(move_from_chunks_) +{ +} + +String SourceFromChunks::getName() const +{ + return "SourceFromChunks"; +} + +Chunk SourceFromChunks::generate() +{ + if (it != chunks->end()) + if (move_from_chunks) + { + Chunk && chunk = std::move(*it); + it++; + return chunk; + } + else + { + Chunk chunk = it->clone(); + it++; + return chunk; + } + else + return {}; +} + +} + diff --git a/src/Processors/Sources/SourceFromChunks.h b/src/Processors/Sources/SourceFromChunks.h new file mode 100644 index 00000000000..d41999208a0 --- /dev/null +++ b/src/Processors/Sources/SourceFromChunks.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class SourceFromChunks : public ISource +{ +public: + SourceFromChunks(Block header, Chunks chunks_); + SourceFromChunks(Block header, std::shared_ptr chunks_); + + String getName() const override; + +protected: + Chunk generate() override; + +private: + SourceFromChunks(Block header, std::shared_ptr chunks_, bool move_from_chunks_); + + const std::shared_ptr chunks; + Chunks::iterator it; + /// Optimization: if the chunks are exclusively owned by SourceFromChunks, then generate() can move from them + const bool move_from_chunks; +}; + +} diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 3e2b128acd2..00f40a34361 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -6,6 +6,7 @@ namespace DB { SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} + SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { const auto & sample = getPort().getHeader(); @@ -23,4 +24,14 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp } } +String SourceFromSingleChunk::getName() const +{ + return "SourceFromSingleChunk"; +} + +Chunk SourceFromSingleChunk::generate() +{ + return std::move(chunk); +} + } diff --git a/src/Processors/Sources/SourceFromSingleChunk.h b/src/Processors/Sources/SourceFromSingleChunk.h index fa85b94c231..fa6fa3856b5 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.h +++ b/src/Processors/Sources/SourceFromSingleChunk.h @@ -1,4 +1,5 @@ #pragma once + #include @@ -7,13 +8,14 @@ namespace DB class SourceFromSingleChunk : public ISource { +/// If the source consists of multiple chunks you can instead use SourceFromChunks. public: - explicit SourceFromSingleChunk(Block header, Chunk chunk_); + SourceFromSingleChunk(Block header, Chunk chunk_); explicit SourceFromSingleChunk(Block data); - String getName() const override { return "SourceFromSingleChunk"; } + String getName() const override; protected: - Chunk generate() override { return std::move(chunk); } + Chunk generate() override; private: Chunk chunk; diff --git a/src/Processors/TTL/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp index c71ad740719..79140137df8 100644 --- a/src/Processors/TTL/ITTLAlgorithm.cpp +++ b/src/Processors/TTL/ITTLAlgorithm.cpp @@ -59,7 +59,7 @@ UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) return column_const->getValue(); } - throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column"); } } diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index 64bdf663d0f..e6c2bcec2c8 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -42,13 +42,13 @@ static void checkCalculated(const ColumnWithTypeAndName & col_read, size_t column_size = col_read.column->size(); if (column_size != col_defaults.column->size()) - throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Mismatch column sizes while adding defaults"); if (column_size < defaults_needed) - throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Unexpected defaults count"); if (!col_read.type->equals(*col_defaults.type)) - throw Exception("Mismatch column types while adding defaults", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Mismatch column types while adding defaults"); } static void mixNumberColumns( @@ -98,7 +98,7 @@ static void mixNumberColumns( }; if (!callOnIndexAndDataType(type_idx, call)) - throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type on mixNumberColumns"); } static MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 653f1b20eb3..836458ef792 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -40,11 +40,11 @@ namespace { const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); return agg_info; } @@ -356,7 +356,7 @@ private: APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); auto blocks = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final); for (auto & block : blocks) @@ -497,7 +497,7 @@ void AggregatingTransform::work() Processors AggregatingTransform::expandPipeline() { if (processors.empty()) - throw Exception("Can not expandPipeline in AggregatingTransform. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not expandPipeline in AggregatingTransform. This is a bug."); auto & out = processors.back()->getOutputs().front(); inputs.emplace_back(out.getHeader(), this); connect(out, inputs.back()); diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp index eea1469c7a6..d9c940b8b05 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.cpp +++ b/src/Processors/Transforms/ArrayJoinTransform.cpp @@ -19,7 +19,7 @@ ArrayJoinTransform::ArrayJoinTransform( { /// TODO // if (on_totals_) -// throw Exception("ARRAY JOIN is not supported for totals", ErrorCodes::LOGICAL_ERROR); +// throw Exception(ErrorCodes::LOGICAL_ERROR, "ARRAY JOIN is not supported for totals"); } void ArrayJoinTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index e7fdb6a3ce8..2628bf7d6db 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -22,7 +22,7 @@ ColumnGathererStream::ColumnGathererStream( , block_preferred_size(block_preferred_size_) { if (num_inputs == 0) - throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED); + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather"); } void ColumnGathererStream::initialize(Inputs inputs) @@ -77,7 +77,7 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() } if (next_required_source != -1 && sources[next_required_source].size == 0) - throw Exception("Cannot fetch required block. Source " + toString(next_required_source), ErrorCodes::RECEIVED_EMPTY_DATA); + throw Exception(ErrorCodes::RECEIVED_EMPTY_DATA, "Cannot fetch required block. Source {}", toString(next_required_source)); /// Surprisingly this call may directly change some internal state of ColumnGathererStream. /// output_column. See ColumnGathererStream::gather. @@ -116,8 +116,7 @@ void ColumnGathererStream::consume(Input & input, size_t source_num) if (0 == source.size) { - throw Exception("Fetched block is empty. Source " + toString(source_num), - ErrorCodes::RECEIVED_EMPTY_DATA); + throw Exception(ErrorCodes::RECEIVED_EMPTY_DATA, "Fetched block is empty. Source {}", source_num); } } @@ -132,9 +131,8 @@ ColumnGathererTransform::ColumnGathererTransform( , log(&Poco::Logger::get("ColumnGathererStream")) { if (header.columns() != 1) - throw Exception( - "Header should have 1 column, but contains " + toString(header.columns()), - ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Header should have 1 column, but contains {}", + toString(header.columns())); } void ColumnGathererTransform::work() diff --git a/src/Processors/Transforms/CopyTransform.cpp b/src/Processors/Transforms/CopyTransform.cpp index c9047c942d6..0af6c90dac9 100644 --- a/src/Processors/Transforms/CopyTransform.cpp +++ b/src/Processors/Transforms/CopyTransform.cpp @@ -12,7 +12,7 @@ CopyTransform::CopyTransform(const Block & header, size_t num_outputs) : IProcessor(InputPorts(1, header), OutputPorts(num_outputs, header)) { if (num_outputs <= 1) - throw Exception("CopyTransform expects more than 1 outputs, got " + std::to_string(num_outputs), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "CopyTransform expects more than 1 outputs, got {}", num_outputs); } IProcessor::Status CopyTransform::prepare() diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index fb3c8d6a87b..6a8d08cb661 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -56,7 +56,7 @@ void CreatingSetsTransform::startSubquery() done_with_table = !subquery.table; if (done_with_set /*&& done_with_join*/ && done_with_table) - throw Exception("Logical error: nothing to do with subquery", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: nothing to do with subquery"); if (table_out.initialized()) { diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp index 669aaddd1df..afbb996f56e 100644 --- a/src/Processors/Transforms/CubeTransform.cpp +++ b/src/Processors/Transforms/CubeTransform.cpp @@ -15,7 +15,7 @@ CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_ , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) { if (keys.size() >= 8 * sizeof(mask)) - throw Exception("Too many keys are used for CubeTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Too many keys are used for CubeTransform."); } Chunk CubeTransform::generate() diff --git a/src/Processors/Transforms/DistinctSortedTransform.cpp b/src/Processors/Transforms/DistinctSortedTransform.cpp index 4c6ca03950c..26ee4d0ad08 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedTransform.cpp @@ -96,7 +96,8 @@ DistinctSortedTransform::DistinctSortedTransform( ColumnNumbers const_column_positions; calcColumnPositionsInHeader(header, column_names, column_positions, const_column_positions); if (column_positions.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "DistinctSortedTransform: all columns can't be const. DistinctTransform should be used instead"); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DistinctSortedTransform: all columns can't be const. DistinctTransform should be used instead"); /// pre-calculate DISTINCT column positions which form sort prefix of sort description calcSortPrefixPositionsInHeader(header, sort_description, column_positions, const_column_positions, sort_prefix_positions); @@ -158,7 +159,8 @@ void DistinctSortedTransform::transform(Chunk & chunk) return; } - if (!set_size_limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) + size_t data_total_row_count = data.getTotalRowCount(); + if (!set_size_limits.check(data_total_row_count, data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) { stopReading(); chunk.clear(); @@ -166,7 +168,7 @@ void DistinctSortedTransform::transform(Chunk & chunk) } /// Stop reading if we already reached the limit. - if (limit_hint && data.getTotalRowCount() >= limit_hint) + if (limit_hint && data_total_row_count >= limit_hint) stopReading(); prev_chunk.chunk = std::move(chunk); diff --git a/src/Processors/Transforms/DistinctTransform.cpp b/src/Processors/Transforms/DistinctTransform.cpp index 4d78adb6e22..3619fa51bf6 100644 --- a/src/Processors/Transforms/DistinctTransform.cpp +++ b/src/Processors/Transforms/DistinctTransform.cpp @@ -94,19 +94,20 @@ void DistinctTransform::transform(Chunk & chunk) } /// Just go to the next chunk if there isn't any new record in the current one. - if (data.getTotalRowCount() == old_set_size) + size_t new_set_size = data.getTotalRowCount(); + if (new_set_size == old_set_size) return; - if (!set_size_limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) + if (!set_size_limits.check(new_set_size, data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) return; for (auto & column : columns) column = column->filter(filter, -1); - chunk.setColumns(std::move(columns), data.getTotalRowCount() - old_set_size); + chunk.setColumns(std::move(columns), new_set_size - old_set_size); /// Stop reading if we already reach the limit - if (limit_hint && data.getTotalRowCount() >= limit_hint) + if (limit_hint && new_set_size >= limit_hint) { stopReading(); return; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 78ae6b8771f..2c5c550ffe2 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -187,22 +187,22 @@ FillingTransform::FillingTransform( const auto & type = header_.getByPosition(block_position).type; if (!tryConvertFields(descr, type)) - throw Exception("Incompatible types of WITH FILL expression values with column type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Incompatible types of WITH FILL expression values with column type {}", type->getName()); if (type->isValueRepresentedByUnsignedInteger() && ((!descr.fill_from.isNull() && less(descr.fill_from, Field{0}, 1)) || (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) { - throw Exception("WITH FILL bound values cannot be negative for unsigned type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL bound values cannot be negative for unsigned type {}", type->getName()); } } std::set unique_positions; for (auto pos : fill_column_positions) if (!unique_positions.insert(pos).second) - throw Exception("Multiple WITH FILL for identical expressions is not supported in ORDER BY", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Multiple WITH FILL for identical expressions is not supported in ORDER BY"); size_t idx = 0; for (const ColumnWithTypeAndName & column : header_.getColumnsWithTypeAndName()) diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index 86b5c4c9a00..d8412eff588 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -31,8 +31,9 @@ FinishSortingTransform::FinishSortingTransform( { /// Check for sanity non-modified descriptions if (!isPrefix(description_sorted_, description_to_sort_)) - throw Exception("Can't finish sorting. SortDescription of already sorted stream is not prefix of " - "SortDescription needed to sort", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Can't finish sorting. SortDescription " + "of already sorted stream is not prefix of SortDescription needed to sort"); /// The target description is modified in SortingTransform constructor. /// To avoid doing the same actions with description_sorted just copy it from prefix of target description. diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index d4e2cd41e9d..3193a07a0bd 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -112,8 +112,7 @@ public: return Status::NeedData; if (!all_finished) - throw Exception( - "SortingAggregatedForMemoryBoundMergingTransform has read bucket, but couldn't push it.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "SortingAggregatedForMemoryBoundMergingTransform has read bucket, but couldn't push it."); if (overflow_chunk) { @@ -154,8 +153,7 @@ private: const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception( - "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 315fc4810ba..cf5b4be4239 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -276,7 +276,7 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( , log(&Poco::Logger::get("MergeJoinAlgorithm")) { if (input_headers.size() != 2) - throw Exception("MergeJoinAlgorithm requires exactly two inputs", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); auto strictness = table_join->getTableJoin().strictness(); if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) @@ -329,10 +329,10 @@ void MergeJoinAlgorithm::initialize(Inputs inputs) void MergeJoinAlgorithm::consume(Input & input, size_t source_num) { if (input.skip_last_row) - throw Exception("skip_last_row is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "skip_last_row is not supported"); if (input.permutation) - throw DB::Exception("permutation is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "permutation is not supported"); if (input.chunk) { diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index 4e90159aa11..9771c24f256 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -225,14 +225,14 @@ IProcessor::Status GroupingAggregatedTransform::prepare() /// Sanity check. If new bucket was read, we should be able to push it. /// This is always false, but we still keep this condition in case the code will be changed. if (!all_inputs_finished) // -V547 - throw Exception("GroupingAggregatedTransform has read new two-level bucket, but couldn't push it.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "GroupingAggregatedTransform has read new two-level bucket, but couldn't push it."); } else { if (!all_inputs_finished) // -V547 - throw Exception("GroupingAggregatedTransform should have read all chunks for single level aggregation, " - "but not all of the inputs are finished.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "GroupingAggregatedTransform should have read all chunks for single level aggregation, " + "but not all of the inputs are finished."); if (tryPushSingleLevelData()) return Status::PortFull; @@ -253,7 +253,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); if (const auto * agg_info = typeid_cast(info.get())) { @@ -319,8 +319,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) const auto * chunks_to_merge = typeid_cast(info.get()); if (!chunks_to_merge) - throw Exception("MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); auto header = params->aggregator.getHeader(false); @@ -329,8 +328,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) { const auto & cur_info = cur_chunk.getChunkInfo(); if (!cur_info) - throw Exception("Chunk info was not set for chunk in MergingAggregatedBucketTransform.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); if (const auto * agg_info = typeid_cast(cur_info.get())) { @@ -405,7 +403,7 @@ void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) @@ -512,8 +510,7 @@ IProcessor::Status SortingAggregatedTransform::prepare() return Status::NeedData; if (!all_finished) - throw Exception("SortingAggregatedTransform has read bucket, but couldn't push it.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "SortingAggregatedTransform has read bucket, but couldn't push it."); if (overflow_chunk) { diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 11d32278caf..9d0be86ff83 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -33,7 +33,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in MergingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); if (const auto * agg_info = typeid_cast(info.get())) { @@ -58,7 +58,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[block.info.bucket_num].emplace_back(std::move(block)); } else - throw Exception("Chunk should have AggregatedChunkInfo in MergingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in MergingAggregatedTransform."); } Chunk MergingAggregatedTransform::generate() diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index 88eddde0b3d..a8bfefdf8a6 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -98,9 +98,8 @@ namespace parse(static_cast &>(value).value())); break; default: - throw Exception( - "Type mismatch, expected a number, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected a number, got type id = {} for column {}", + toString(value.type()), name); } } @@ -156,15 +155,15 @@ namespace break; } - throw Exception{"Type mismatch, expected String, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String, got type id = {} for column {}", + toString(value.type()), name); } case ValueType::vtDate: { if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", + toString(value.type()), name); assert_cast(column).getData().push_back(static_cast(DateLUT::instance().toDayNum( static_cast &>(value).value().epochTime()))); @@ -174,8 +173,8 @@ namespace case ValueType::vtDateTime: { if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", + toString(value.type()), name); assert_cast(column).getData().push_back( static_cast(static_cast &>(value).value().epochTime())); @@ -189,13 +188,12 @@ namespace assert_cast(column).getData().push_back(parse(string)); } else - throw Exception{"Type mismatch, expected String (UUID), got type id = " + toString(value.type()) + " for column " - + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String (UUID), got type id = {} for column {}", + toString(value.type()), name); break; } default: - throw Exception("Value of unsupported type:" + column.getName(), ErrorCodes::UNKNOWN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Value of unsupported type:{}", column.getName()); } } diff --git a/src/Processors/Transforms/SortingTransform.cpp b/src/Processors/Transforms/SortingTransform.cpp index 603ee06b203..3d6bad6ed06 100644 --- a/src/Processors/Transforms/SortingTransform.cpp +++ b/src/Processors/Transforms/SortingTransform.cpp @@ -359,8 +359,8 @@ void SortingTransform::removeConstColumns(Chunk & chunk) size_t num_rows = chunk.getNumRows(); if (num_columns != const_columns_to_remove.size()) - throw Exception("Block has different number of columns with header: " + toString(num_columns) - + " vs " + toString(const_columns_to_remove.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Block has different number of columns with header: {} vs {}", + num_columns, const_columns_to_remove.size()); auto columns = chunk.detachColumns(); Columns column_without_constants; @@ -394,8 +394,7 @@ void SortingTransform::enrichChunkWithConstants(Chunk & chunk) else { if (next_non_const_column >= columns.size()) - throw Exception("Can't enrich chunk with constants because run out of non-constant columns.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't enrich chunk with constants because run out of non-constant columns."); column_with_constants.emplace_back(std::move(columns[next_non_const_column])); ++next_non_const_column; @@ -407,7 +406,7 @@ void SortingTransform::enrichChunkWithConstants(Chunk & chunk) void SortingTransform::serialize() { - throw Exception("Method 'serialize' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'serialize' is not implemented for {} processor", getName()); } } diff --git a/src/Processors/Transforms/StreamInQueryCacheTransform.cpp b/src/Processors/Transforms/StreamInQueryCacheTransform.cpp new file mode 100644 index 00000000000..1ba57ea8ed2 --- /dev/null +++ b/src/Processors/Transforms/StreamInQueryCacheTransform.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +StreamInQueryCacheTransform::StreamInQueryCacheTransform( + const Block & header_, QueryCachePtr cache, const QueryCache::Key & cache_key, std::chrono::milliseconds min_query_duration) + : ISimpleTransform(header_, header_, false) + , cache_writer(cache->createWriter(cache_key, min_query_duration)) +{ +} + +void StreamInQueryCacheTransform::transform(Chunk & chunk) +{ + cache_writer.buffer(chunk.clone()); +} + +void StreamInQueryCacheTransform::finalizeWriteInQueryCache() +{ + if (!isCancelled()) + cache_writer.finalizeWrite(); +} + +}; diff --git a/src/Processors/Transforms/StreamInQueryCacheTransform.h b/src/Processors/Transforms/StreamInQueryCacheTransform.h new file mode 100644 index 00000000000..15d977cd445 --- /dev/null +++ b/src/Processors/Transforms/StreamInQueryCacheTransform.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StreamInQueryCacheTransform : public ISimpleTransform +{ +public: + StreamInQueryCacheTransform( + const Block & header_, QueryCachePtr cache, const QueryCache::Key & cache_key, std::chrono::milliseconds min_query_duration); + +protected: + void transform(Chunk & chunk) override; + +public: + void finalizeWriteInQueryCache(); + String getName() const override { return "StreamInQueryCacheTransform"; } + +private: + QueryCache::Writer cache_writer; +}; + +} diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 29be0ceed23..578d8cb8374 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -152,11 +152,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) { const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in TotalsHavingTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo in TotalsHavingTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); if (agg_info->is_overflows) { @@ -189,7 +189,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) for (const auto & action : expression->getActions()) { if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("Having clause cannot contain arrayJoin", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Having clause cannot contain arrayJoin"); } expression->execute(finalized_block, num_rows); @@ -260,7 +260,7 @@ void TotalsHavingTransform::addToTotals(const Chunk & chunk, const IColumn::Filt size_t size = vec.size(); if (filter && filter->size() != size) - throw Exception("Filter has size which differs from column size", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter has size which differs from column size"); if (filter) { diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 9bfaf1f375f..cb9ab95fba4 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1522,7 +1522,8 @@ namespace recurrent_detail template void setValueToOutputColumn(const WindowTransform * /*transform*/, size_t /*function_index*/, T /*value*/) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "recurrent_detail::setValueToOutputColumn() is not implemented for {} type", typeid(T).name()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "recurrent_detail::setValueToOutputColumn() is not implemented for {} type", typeid(T).name()); } template<> void setValueToOutputColumn(const WindowTransform * transform, size_t function_index, Float64 value) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 1be05135fe4..ade056629db 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -310,8 +310,8 @@ Chain buildPushingToViewsChain( if (lock == nullptr) { - // In case the materialized view is dropped at this point, we register a warning and ignore it - assert(materialized_view->is_dropped); + // In case the materialized view is dropped/detached at this point, we register a warning and ignore it + assert(materialized_view->is_dropped || materialized_view->is_detached); LOG_WARNING( &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); continue; diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index 4ee3f2d4b82..ab7cfca3de2 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -37,17 +37,17 @@ InputFormatPtr getInputFormatFromASTInsertQuery( const auto * ast_insert_query = ast->as(); if (!ast_insert_query) - throw Exception("Logical error: query requires data to insert, but it is not INSERT query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query"); if (ast_insert_query->infile && context->getApplicationType() == Context::ApplicationType::SERVER) - throw Exception("Query has infile and was send directly to server", ErrorCodes::UNKNOWN_TYPE_OF_QUERY); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Query has infile and was send directly to server"); if (ast_insert_query->format.empty()) { if (input_function) - throw Exception("FORMAT must be specified for function input()", ErrorCodes::INVALID_USAGE_OF_INPUT); + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); else - throw Exception("Logical error: INSERT query requires format to be set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: INSERT query requires format to be set"); } /// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query. diff --git a/src/QueryPipeline/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp index 9e42e06c722..9af7cd2b772 100644 --- a/src/QueryPipeline/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -47,6 +47,22 @@ BlockIO::~BlockIO() reset(); } +void BlockIO::onFinish() +{ + if (finish_callback) + finish_callback(pipeline); + + pipeline.reset(); +} + +void BlockIO::onException() +{ + if (exception_callback) + exception_callback(); + + pipeline.reset(); +} + void BlockIO::setAllDataSent() const { /// The following queries does not have process_list_entry: diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h index b69f86ac684..4c8d29d0ba8 100644 --- a/src/QueryPipeline/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -31,21 +31,8 @@ struct BlockIO /// When it is true, don't bother sending any non-empty blocks to the out stream bool null_format = false; - void onFinish() - { - if (finish_callback) - finish_callback(pipeline); - - pipeline.reset(); - } - - void onException() - { - if (exception_callback) - exception_callback(); - - pipeline.reset(); - } + void onFinish(); + void onException(); /// Set is_all_data_sent in system.processes for this query. void setAllDataSent() const; diff --git a/src/QueryPipeline/Chain.cpp b/src/QueryPipeline/Chain.cpp index e5f2556a44f..6122517432a 100644 --- a/src/QueryPipeline/Chain.cpp +++ b/src/QueryPipeline/Chain.cpp @@ -19,7 +19,7 @@ static void checkSingleInput(const IProcessor & transform) transform.getInputs().size()); if (transform.getInputs().front().isConnected()) - throw Exception("Transform for chain has connected input", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Transform for chain has connected input"); } static void checkSingleOutput(const IProcessor & transform) @@ -32,7 +32,7 @@ static void checkSingleOutput(const IProcessor & transform) transform.getOutputs().size()); if (transform.getOutputs().front().isConnected()) - throw Exception("Transform for chain has connected output", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Transform for chain has connected output"); } static void checkTransform(const IProcessor & transform) diff --git a/src/QueryPipeline/ConnectionCollector.cpp b/src/QueryPipeline/ConnectionCollector.cpp index 9c52a2caf9c..7c484dcd6e8 100644 --- a/src/QueryPipeline/ConnectionCollector.cpp +++ b/src/QueryPipeline/ConnectionCollector.cpp @@ -34,7 +34,7 @@ ConnectionCollector & ConnectionCollector::init(ContextMutablePtr global_context { if (connection_collector) { - throw Exception("Connection collector is initialized twice. This is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Connection collector is initialized twice. This is a bug"); } connection_collector.reset(new ConnectionCollector(global_context_, max_threads)); diff --git a/src/QueryPipeline/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp index 9f0bd17e585..111ba7c9a95 100644 --- a/src/QueryPipeline/ExecutionSpeedLimits.cpp +++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp @@ -112,7 +112,7 @@ static bool handleOverflowMode(OverflowMode mode, int code, FormatStringHelpergetInputs().size()); if (!output) - throw Exception("Cannot create Pipe from source because specified output port is nullptr", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe from source because specified output port is nullptr"); if (output == totals || output == extremes || (totals && totals == extremes)) - throw Exception("Cannot create Pipe from source because some of specified ports are the same", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe from source because some of specified ports are the same"); header = output->getHeader(); @@ -191,7 +189,9 @@ Pipe::Pipe(std::shared_ptr processors_) : processors(std::move(proce { if (!port.isConnected()) throw Exception( - ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe because processor {} has disconnected input port", processor->getName()); + ErrorCodes::LOGICAL_ERROR, + "Cannot create Pipe because processor {} has disconnected input port", + processor->getName()); const auto * connected_processor = &port.getOutputPort().getProcessor(); if (!set.contains(connected_processor)) @@ -221,8 +221,7 @@ Pipe::Pipe(std::shared_ptr processors_) : processors(std::move(proce } if (output_ports.empty()) - throw Exception("Cannot create Pipe because processors don't have any disconnected output ports", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe because processors don't have any disconnected output ports"); header = output_ports.front()->getHeader(); for (size_t i = 1; i < output_ports.size(); ++i) @@ -365,10 +364,10 @@ void Pipe::addSource(ProcessorPtr source) void Pipe::addTotalsSource(ProcessorPtr source) { if (output_ports.empty()) - throw Exception("Cannot add totals source to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add totals source to empty Pipe"); if (totals_port) - throw Exception("Totals source was already added to Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals source was already added to Pipe"); checkSource(*source); const auto & source_header = output_ports.front()->getHeader(); @@ -385,10 +384,10 @@ void Pipe::addTotalsSource(ProcessorPtr source) void Pipe::addExtremesSource(ProcessorPtr source) { if (output_ports.empty()) - throw Exception("Cannot add extremes source to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add extremes source to empty Pipe"); if (extremes_port) - throw Exception("Extremes source was already added to Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Extremes source was already added to Pipe"); checkSource(*source); const auto & source_header = output_ports.front()->getHeader(); @@ -435,7 +434,7 @@ void Pipe::addTransform(ProcessorPtr transform) void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes) { if (output_ports.empty()) - throw Exception("Cannot add transform to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform to empty Pipe"); auto & inputs = transform->getInputs(); if (inputs.size() != output_ports.size()) @@ -447,12 +446,10 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort output_ports.size()); if (totals && totals_port) - throw Exception("Cannot add transform with totals to Pipe because it already has totals", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with totals to Pipe because it already has totals"); if (extremes && extremes_port) - throw Exception("Cannot add transform with extremes to Pipe because it already has extremes", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with extremes to Pipe because it already has extremes"); if (totals) totals_port = totals; @@ -485,16 +482,18 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort } if (totals && !found_totals) - throw Exception("Cannot add transform " + transform->getName() + " to Pipes because " - "specified totals port does not belong to it", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot add transform {} to Pipes because specified totals port does not belong to it", + transform->getName()); if (extremes && !found_extremes) - throw Exception("Cannot add transform " + transform->getName() + " to Pipes because " - "specified extremes port does not belong to it", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot add transform {} to Pipes because specified extremes port does not belong to it", + transform->getName()); if (output_ports.empty()) - throw Exception("Cannot add transform " + transform->getName() + " to Pipes because it has no outputs", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform {} to Pipes because it has no outputs", + transform->getName()); header = output_ports.front()->getHeader(); for (size_t i = 1; i < output_ports.size(); ++i) @@ -518,7 +517,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes) { if (output_ports.empty()) - throw Exception("Cannot add transform to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform to empty Pipe"); auto & inputs = transform->getInputs(); size_t expected_inputs = output_ports.size() + (totals ? 1 : 0) + (extremes ? 1 : 0); @@ -531,12 +530,10 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * expected_inputs); if (totals && !totals_port) - throw Exception("Cannot add transform consuming totals to Pipe because Pipe does not have totals", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming totals to Pipe because Pipe does not have totals"); if (extremes && !extremes_port) - throw Exception("Cannot add transform consuming extremes to Pipe because it already has extremes", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming extremes to Pipe because it already has extremes"); if (totals) { @@ -609,7 +606,7 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) - throw Exception("Cannot add simple transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add simple transform to empty Pipe."); Block new_header; @@ -715,7 +712,7 @@ void Pipe::addChains(std::vector chains) void Pipe::resize(size_t num_streams, bool force, bool strict) { if (output_ports.empty()) - throw Exception("Cannot resize an empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot resize an empty Pipe"); if (!force && num_streams == numOutputPorts()) return; @@ -733,7 +730,7 @@ void Pipe::resize(size_t num_streams, bool force, bool strict) void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) - throw Exception("Cannot set sink to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set sink to empty Pipe"); auto add_transform = [&](OutputPort *& stream, Pipe::StreamType stream_type) { @@ -779,7 +776,7 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) void Pipe::transform(const Transformer & transformer, bool check_ports) { if (output_ports.empty()) - throw Exception("Cannot transform empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot transform empty Pipe"); auto new_processors = transformer(output_ports); @@ -837,16 +834,17 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) const auto * connected_processor = &port.getInputPort().getProcessor(); if (check_ports && !set.contains(connected_processor)) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Transformation of Pipe is not valid because processor {} has output port which is connected with unknown processor {}", - processor->getName(), - connected_processor->getName()); + ErrorCodes::LOGICAL_ERROR, + "Transformation of Pipe is not valid because processor {} has output port which " + "is connected with unknown processor {}", + processor->getName(), + connected_processor->getName()); } } if (output_ports.empty()) - throw Exception( - "Transformation of Pipe is not valid because processors don't have any disconnected output ports", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Transformation of Pipe is not valid because processors don't have any disconnected output ports"); header = output_ports.front()->getHeader(); for (size_t i = 1; i < output_ports.size(); ++i) diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index e0da4c4f0eb..b7b18014f1f 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -514,7 +515,6 @@ void QueryPipeline::setLimitsAndQuota(const StreamLocalLimits & limits, std::sha processors->emplace_back(std::move(transform)); } - bool QueryPipeline::tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const { if (!output_format) @@ -525,6 +525,27 @@ bool QueryPipeline::tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & resu return true; } +void QueryPipeline::streamIntoQueryCache(std::shared_ptr transform) +{ + assert(pulling()); + + connect(*output, transform->getInputPort()); + output = &transform->getOutputPort(); + processors->emplace_back(transform); +} + +void QueryPipeline::finalizeWriteInQueryCache() +{ + auto it = std::find_if( + processors->begin(), processors->end(), + [](ProcessorPtr processor){ return dynamic_cast(&*processor); }); + + /// the pipeline should theoretically contain just one StreamInQueryCacheTransform + + if (it != processors->end()) + dynamic_cast(**it).finalizeWriteInQueryCache(); +} + void QueryPipeline::addStorageHolder(StoragePtr storage) { resources.storage_holders.emplace_back(std::move(storage)); diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 153bcc55b39..55c78ca78ed 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -4,7 +4,6 @@ #include #include - namespace DB { @@ -32,6 +31,7 @@ class SinkToStorage; class ISource; class ISink; class ReadProgressCallback; +class StreamInQueryCacheTransform; struct ColumnWithTypeAndName; using ColumnsWithTypeAndName = std::vector; @@ -105,6 +105,9 @@ public: void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota_); bool tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const; + void streamIntoQueryCache(std::shared_ptr transform); + void finalizeWriteInQueryCache(); + void setQuota(std::shared_ptr quota_); void addStorageHolder(StoragePtr storage); diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index a3b3438306e..483447d1e4d 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -41,7 +41,7 @@ namespace ErrorCodes void QueryPipelineBuilder::checkInitialized() { if (!initialized()) - throw Exception("QueryPipeline is uninitialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPipeline is uninitialized"); } void QueryPipelineBuilder::checkInitializedAndNotCompleted() @@ -49,7 +49,7 @@ void QueryPipelineBuilder::checkInitializedAndNotCompleted() checkInitialized(); if (pipe.isCompleted()) - throw Exception("QueryPipeline is already completed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPipeline is already completed"); } static void checkSource(const ProcessorPtr & source, bool can_have_totals) @@ -83,10 +83,10 @@ static void checkSource(const ProcessorPtr & source, bool can_have_totals) void QueryPipelineBuilder::init(Pipe pipe_) { if (initialized()) - throw Exception("Pipeline has already been initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline has already been initialized"); if (pipe_.empty()) - throw Exception("Can't initialize pipeline with empty pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't initialize pipeline with empty pipe"); pipe = std::move(pipe_); } @@ -94,10 +94,10 @@ void QueryPipelineBuilder::init(Pipe pipe_) void QueryPipelineBuilder::init(QueryPipeline & pipeline) { if (initialized()) - throw Exception("Pipeline has already been initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline has already been initialized"); if (pipeline.pushing()) - throw Exception("Can't initialize pushing pipeline", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't initialize pushing pipeline"); if (pipeline.output) { @@ -208,10 +208,10 @@ void QueryPipelineBuilder::addTotalsHavingTransform(ProcessorPtr transform) checkInitializedAndNotCompleted(); if (!typeid_cast(transform.get())) - throw Exception("TotalsHavingTransform is expected for QueryPipeline::addTotalsHavingTransform", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TotalsHavingTransform is expected for QueryPipeline::addTotalsHavingTransform"); if (pipe.getTotalsPort()) - throw Exception("Totals having transform was already added to pipeline", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals having transform was already added to pipeline"); resize(1); @@ -224,7 +224,7 @@ void QueryPipelineBuilder::addDefaultTotals() checkInitializedAndNotCompleted(); if (pipe.getTotalsPort()) - throw Exception("Totals having transform was already added to pipeline", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals having transform was already added to pipeline"); const auto & current_header = getHeader(); Columns columns; @@ -351,7 +351,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped left->pipe.dropExtremes(); right->pipe.dropExtremes(); if (left->getNumStreams() != 1 || right->getNumStreams() != 1) - throw Exception("Join is supported only for pipelines with one output port", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Join is supported only for pipelines with one output port"); if (left->hasTotals() || right->hasTotals()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Current join algorithm is supported only for pipelines without totals"); @@ -464,7 +464,9 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe { delayed_root = std::make_shared(num_streams, join); if (!delayed_root->getInputs().empty() || delayed_root->getOutputs().size() != num_streams) - throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksTransform should have no inputs and {} outputs, but has {} inputs and {} outputs", + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DelayedJoinedBlocksTransform should have no inputs and {} outputs, " + "but has {} inputs and {} outputs", num_streams, delayed_root->getInputs().size(), delayed_root->getOutputs().size()); if (collected_processors) @@ -491,7 +493,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe // Process delayed joined blocks when all JoiningTransform are finished. auto delayed = std::make_shared(joined_header); if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1) - throw Exception("DelayedJoinedBlocksWorkerTransform should have one input and one output", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output"); connect(*delayed_root_output_ports[i], delayed->getInputs().front()); @@ -621,7 +623,7 @@ void QueryPipelineBuilder::setProgressCallback(ProgressCallback callback) PipelineExecutorPtr QueryPipelineBuilder::execute() { if (!isCompleted()) - throw Exception("Cannot execute pipeline because it is not completed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute pipeline because it is not completed"); return std::make_shared(pipe.processors, process_list_element); } diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 49bc6a6326d..961d8129d29 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -357,7 +357,7 @@ std::variant RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs else return read(*read_context); } - throw Exception("Found duplicate uuids while processing query", ErrorCodes::DUPLICATED_PART_UUIDS); + throw Exception(ErrorCodes::DUPLICATED_PART_UUIDS, "Found duplicate uuids while processing query"); } std::optional RemoteQueryExecutor::processPacket(Packet packet) @@ -466,7 +466,7 @@ bool RemoteQueryExecutor::setPartUUIDs(const std::vector & uuids) void RemoteQueryExecutor::processReadTaskRequest() { if (!task_iterator) - throw Exception("Distributed task iterator is not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Distributed task iterator is not initialized"); auto response = (*task_iterator)(); connections->sendReadTaskResponse(response); } @@ -474,7 +474,7 @@ void RemoteQueryExecutor::processReadTaskRequest() void RemoteQueryExecutor::processMergeTreeReadTaskRequest(PartitionReadRequest request) { if (!parallel_reading_coordinator) - throw Exception("Coordinator for parallel reading from replicas is not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Coordinator for parallel reading from replicas is not initialized"); auto response = parallel_reading_coordinator->handleRequest(std::move(request)); connections->sendMergeTreeReadTaskResponse(response); diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 4596bbb8961..f1a23bf7c79 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -148,7 +148,7 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) { /// Socket receive timeout. Drain it in case of error, or it may be hide by timeout exception. timer.drain(); - throw NetException("Timeout exceeded", ErrorCodes::SOCKET_TIMEOUT); + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded"); } return true; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 632e109d7bd..595f5a8c2b7 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -130,9 +130,7 @@ namespace } return grpc::SslServerCredentials(options); #else - throw DB::Exception( - "Can't use SSL in grpc, because ClickHouse was built without SSL library", - DB::ErrorCodes::SUPPORT_IS_DISABLED); + throw DB::Exception(DB::ErrorCodes::SUPPORT_IS_DISABLED, "Can't use SSL in grpc, because ClickHouse was built without SSL library"); #endif } return grpc::InsecureServerCredentials(); @@ -243,10 +241,9 @@ namespace { auto max_session_timeout = config.getUInt("max_session_timeout", 3600); if (session_timeout > max_session_timeout) - throw Exception( - "Session timeout '" + std::to_string(session_timeout) + "' is larger than max_session_timeout: " - + std::to_string(max_session_timeout) + ". Maximum session timeout could be modified in configuration file.", - ErrorCodes::INVALID_SESSION_TIMEOUT); + throw Exception(ErrorCodes::INVALID_SESSION_TIMEOUT, "Session timeout '{}' is larger than max_session_timeout: {}. " + "Maximum session timeout could be modified in configuration file.", + std::to_string(session_timeout), std::to_string(max_session_timeout)); } else session_timeout = config.getInt("default_session_timeout", 60); @@ -429,7 +426,7 @@ namespace void write(const GRPCResult &, const CompletionCallback &) override { - throw Exception("Responder::write() should not be called", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Responder::write() should not be called"); } void writeAndFinish(const GRPCResult & result, const grpc::Status & status, const CompletionCallback & callback) override @@ -461,7 +458,7 @@ namespace void write(const GRPCResult &, const CompletionCallback &) override { - throw Exception("Responder::write() should not be called", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Responder::write() should not be called"); } void writeAndFinish(const GRPCResult & result, const grpc::Status & status, const CompletionCallback & callback) override @@ -778,7 +775,7 @@ namespace readQueryInfo(); if (query_info.cancel()) - throw Exception("Initial query info cannot set the 'cancel' field", ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Initial query info cannot set the 'cancel' field"); LOG_DEBUG(log, "Received initial QueryInfo: {}", getQueryDescription(query_info)); } @@ -918,7 +915,7 @@ namespace query_context->setExternalTablesInitializer([this] (ContextPtr context) { if (context != query_context) - throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in external tables initializer"); createExternalTables(); }); @@ -926,7 +923,7 @@ namespace query_context->setInputInitializer([this] (ContextPtr context, const StoragePtr & input_storage) { if (context != query_context) - throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in Input initializer"); input_function_is_used = true; initializePipeline(input_storage->getInMemoryMetadataPtr()->getSampleBlock()); }); @@ -934,7 +931,7 @@ namespace query_context->setInputBlocksReaderCallback([this](ContextPtr context) -> Block { if (context != query_context) - throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in InputBlocksReader"); Block block; while (!block && pipeline_executor->pull(block)); @@ -962,12 +959,12 @@ namespace if (!has_data_to_insert) { if (!insert_query) - throw Exception("Query requires data to insert, but it is not an INSERT query", ErrorCodes::NO_DATA_TO_INSERT); + throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "Query requires data to insert, but it is not an INSERT query"); else { const auto & settings = query_context->getSettingsRef(); if (settings.throw_if_no_data_to_insert) - throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); + throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert"); else return; } @@ -1026,7 +1023,7 @@ namespace break; if (!isInputStreaming(call_type)) - throw Exception("next_query_info is allowed to be set only for streaming input", ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "next_query_info is allowed to be set only for streaming input"); readQueryInfo(); if (!query_info.query().empty() || !query_info.query_id().empty() || !query_info.settings().empty() @@ -1034,9 +1031,9 @@ namespace || query_info.external_tables_size() || !query_info.user_name().empty() || !query_info.password().empty() || !query_info.quota().empty() || !query_info.session_id().empty()) { - throw Exception("Extra query infos can be used only to add more input data. " - "Only the following fields can be set: input_data, next_query_info, cancel", - ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, + "Extra query infos can be used only to add more input data. " + "Only the following fields can be set: input_data, next_query_info, cancel"); } if (isQueryCancelled()) @@ -1148,7 +1145,7 @@ namespace break; if (!isInputStreaming(call_type)) - throw Exception("next_query_info is allowed to be set only for streaming input", ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "next_query_info is allowed to be set only for streaming input"); readQueryInfo(); if (!query_info.query().empty() || !query_info.query_id().empty() || !query_info.settings().empty() @@ -1156,9 +1153,11 @@ namespace || !query_info.output_format().empty() || !query_info.user_name().empty() || !query_info.password().empty() || !query_info.quota().empty() || !query_info.session_id().empty()) { - throw Exception("Extra query infos can be used only to add more data to input or more external tables. " - "Only the following fields can be set: input_data, external_tables, next_query_info, cancel", - ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, + "Extra query infos can be used only " + "to add more data to input or more external tables. " + "Only the following fields can be set: " + "input_data, external_tables, next_query_info, cancel"); } if (isQueryCancelled()) break; @@ -1439,9 +1438,9 @@ namespace if (failed_to_read_query_info) { if (initial_query_info_read) - throw Exception("Failed to read extra QueryInfo", ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Failed to read extra QueryInfo"); else - throw Exception("Failed to read initial QueryInfo", ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Failed to read initial QueryInfo"); } } @@ -1684,7 +1683,7 @@ namespace void Call::throwIfFailedToSendResult() { if (failed_to_send_result) - throw Exception("Failed to send result to the client", ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Failed to send result to the client"); } void Call::sendException(const Exception & exception) @@ -1873,7 +1872,7 @@ void GRPCServer::start() grpc_server = builder.BuildAndStart(); if (nullptr == grpc_server) { - throw DB::Exception("Can't start grpc server, there is a port conflict", DB::ErrorCodes::NETWORK_ERROR); + throw DB::Exception(DB::ErrorCodes::NETWORK_ERROR, "Can't start grpc server, there is a port conflict"); } runner->start(); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index d0b2b3fd493..29bfa8065ba 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -260,12 +260,12 @@ static std::chrono::steady_clock::duration parseSessionTimeout( ReadBufferFromString buf(session_timeout_str); if (!tryReadIntText(session_timeout, buf) || !buf.eof()) - throw Exception("Invalid session timeout: '" + session_timeout_str + "'", ErrorCodes::INVALID_SESSION_TIMEOUT); + throw Exception(ErrorCodes::INVALID_SESSION_TIMEOUT, "Invalid session timeout: '{}'", session_timeout_str); if (session_timeout > max_session_timeout) - throw Exception("Session timeout '" + session_timeout_str + "' is larger than max_session_timeout: " + toString(max_session_timeout) - + ". Maximum session timeout could be modified in configuration file.", - ErrorCodes::INVALID_SESSION_TIMEOUT); + throw Exception(ErrorCodes::INVALID_SESSION_TIMEOUT, "Session timeout '{}' is larger than max_session_timeout: {}. " + "Maximum session timeout could be modified in configuration file.", + session_timeout_str, max_session_timeout); } return std::chrono::seconds(session_timeout); @@ -279,12 +279,12 @@ void HTTPHandler::pushDelayedResults(Output & used_output) auto * cascade_buffer = typeid_cast(used_output.out_maybe_delayed_and_compressed.get()); if (!cascade_buffer) - throw Exception("Expected CascadeWriteBuffer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected CascadeWriteBuffer"); cascade_buffer->getResultBuffers(write_buffers); if (write_buffers.empty()) - throw Exception("At least one buffer is expected to overwrite result into HTTP response", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "At least one buffer is expected to overwrite result into HTTP response"); for (auto & write_buf : write_buffers) { @@ -352,25 +352,31 @@ bool HTTPHandler::authenticateUser( { /// It is prohibited to mix different authorization schemes. if (has_http_credentials) - throw Exception("Invalid authentication: it is not allowed to use SSL certificate authentication and Authorization HTTP header simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use SSL certificate authentication and Authorization HTTP header simultaneously"); if (has_credentials_in_query_params) - throw Exception("Invalid authentication: it is not allowed to use SSL certificate authentication and authentication via parameters simultaneously simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use SSL certificate authentication and authentication via parameters simultaneously simultaneously"); if (has_ssl_certificate_auth) { #if USE_SSL if (!password.empty()) - throw Exception("Invalid authentication: it is not allowed to use SSL certificate authentication and authentication via password simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use SSL certificate authentication and authentication via password simultaneously"); if (request.havePeerCertificate()) certificate_common_name = request.peerCertificate().commonName(); if (certificate_common_name.empty()) - throw Exception("Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name"); #else - throw Exception( - "SSL certificate authentication disabled because ClickHouse was built without SSL library", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "SSL certificate authentication disabled because ClickHouse was built without SSL library"); #endif } } @@ -378,7 +384,9 @@ bool HTTPHandler::authenticateUser( { /// It is prohibited to mix different authorization schemes. if (has_credentials_in_query_params) - throw Exception("Invalid authentication: it is not allowed to use Authorization HTTP header and authentication via parameters simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use Authorization HTTP header and authentication via parameters simultaneously"); std::string scheme; std::string auth_info; @@ -395,11 +403,11 @@ bool HTTPHandler::authenticateUser( spnego_challenge = auth_info; if (spnego_challenge.empty()) - throw Exception("Invalid authentication: SPNEGO challenge is empty", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); } else { - throw Exception("Invalid authentication: '" + scheme + "' HTTP Authorization scheme is not supported", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); } quota_key = params.get("quota_key", ""); @@ -419,7 +427,7 @@ bool HTTPHandler::authenticateUser( auto * certificate_credentials = dynamic_cast(request_credentials.get()); if (!certificate_credentials) - throw Exception("Invalid authentication: expected SSL certificate authorization scheme", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); } else if (!spnego_challenge.empty()) { @@ -428,7 +436,7 @@ bool HTTPHandler::authenticateUser( auto * gss_acceptor_context = dynamic_cast(request_credentials.get()); if (!gss_acceptor_context) - throw Exception("Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunreachable-code" @@ -441,7 +449,7 @@ bool HTTPHandler::authenticateUser( if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) { if (spnego_response.empty()) - throw Exception("Invalid authentication: 'Negotiate' HTTP Authorization failure", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); response.send(); @@ -455,7 +463,7 @@ bool HTTPHandler::authenticateUser( auto * basic_credentials = dynamic_cast(request_credentials.get()); if (!basic_credentials) - throw Exception("Invalid authentication: expected 'Basic' HTTP Authorization scheme", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); basic_credentials->setUserName(user); basic_credentials->setPassword(password); @@ -614,7 +622,7 @@ void HTTPHandler::processQuery( { auto * prev_memory_buffer = typeid_cast(prev_buf.get()); if (!prev_memory_buffer) - throw Exception("Expected MemoryWriteBuffer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected MemoryWriteBuffer"); auto rdbuf = prev_memory_buffer->tryGetReadBuffer(); copyData(*rdbuf , *next_buffer); @@ -980,9 +988,9 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse /// Workaround. Poco does not detect 411 Length Required case. if (request.getMethod() == HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && !request.hasContentLength()) { - throw Exception( - "The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", - ErrorCodes::HTTP_LENGTH_REQUIRED); + throw Exception(ErrorCodes::HTTP_LENGTH_REQUIRED, + "The Transfer-Encoding is not chunked and there " + "is no Content-Length header for POST request"); } processQuery(request, params, response, used_output, query_scope); @@ -1189,10 +1197,8 @@ static inline CompiledRegexPtr getCompiledRegex(const std::string & expression) auto compiled_regex = std::make_shared(expression); if (!compiled_regex->ok()) - throw Exception( - "Cannot compile re2: " + expression + " for http handling rule, error: " + compiled_regex->error() - + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", - ErrorCodes::CANNOT_COMPILE_REGEXP); + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile re2: {} for http handling rule, error: {}. " + "Look at https://github.com/google/re2/wiki/Syntax for reference.", expression, compiled_regex->error()); return compiled_regex; } @@ -1202,7 +1208,7 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix) { if (!config.has(config_prefix + ".handler.query")) - throw Exception("There is no path '" + config_prefix + ".handler.query' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no path '{}.handler.query' in configuration file.", config_prefix); std::string predefined_query = config.getString(config_prefix + ".handler.query"); NameSet analyze_receive_params = analyzeReceiveQueryParams(predefined_query); diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index e4da7941b50..78e374ee9e0 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -55,8 +55,8 @@ static inline auto createHandlersFactoryFromConfig( const auto & handler_type = config.getString(prefix + "." + key + ".handler.type", ""); if (handler_type.empty()) - throw Exception("Handler type in config is not specified here: " + prefix + "." + key + ".handler.type", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Handler type in config is not specified here: " + "{}.{}.handler.type", prefix, key); if (handler_type == "static") main_handler_factory->addHandler(createStaticHandlerFactory(server, config, prefix + "." + key)); @@ -69,12 +69,12 @@ static inline auto createHandlersFactoryFromConfig( else if (handler_type == "replicas_status") main_handler_factory->addHandler(createReplicasStatusHandlerFactory(server, config, prefix + "." + key)); else - throw Exception("Unknown handler type '" + handler_type + "' in config here: " + prefix + "." + key + ".handler.type", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown handler type '{}' in config here: {}.{}.handler.type", + handler_type, prefix, key); } else - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule' or 'defaults'", - ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: " + "{}.{}, must be 'rule' or 'defaults'", prefix, key); } return main_handler_factory; @@ -116,7 +116,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco:: else if (name == "PrometheusHandler-factory") return createPrometheusMainHandlerFactory(server, config, async_metrics, name); - throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Unknown HTTP handler factory name."); } static const auto ping_response_expression = "Ok.\n"; diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index f56c712c615..ebdfa954bf7 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -63,7 +63,7 @@ public: else if (filter_type == "methods") addFilter(methodsFilter(config, prefix + ".methods")); else - throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}", prefix, filter_type); } } diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 3e6a562e3fa..c6bcdb211e1 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -58,8 +58,9 @@ static inline auto getExpression(const std::string & expression) auto compiled_regex = std::make_shared(expression.substr(6)); if (!compiled_regex->ok()) - throw Exception("cannot compile re2: " + expression + " for http handling rule, error: " + compiled_regex->error() + - ". Look at https://github.com/google/re2/wiki/Syntax for reference.", ErrorCodes::CANNOT_COMPILE_REGEXP); + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "cannot compile re2: {} for http handling rule, error: {}. " + "Look at https://github.com/google/re2/wiki/Syntax for reference.", + expression, compiled_regex->error()); return std::make_pair(expression, compiled_regex); } diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 3fc66664795..6b0cd543053 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -63,7 +63,7 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer /// Locked for read while query processing std::shared_lock lock(endpoint->rwlock); if (endpoint->blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Transferring part to replica was cancelled"); if (compress) { @@ -135,7 +135,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe bool is_real_error = e.code() != ErrorCodes::ABORTED; PreformattedMessage message = getCurrentExceptionMessageAndPattern(is_real_error); - write_response(message.message); + write_response(message.text); if (is_real_error) LOG_ERROR(log, message); @@ -146,7 +146,7 @@ void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPSe { response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR); PreformattedMessage message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); - write_response(message.message); + write_response(message.text); LOG_ERROR(log, message); } diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 38a10926036..db3dfefb238 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -269,12 +269,12 @@ Poco::Timespan KeeperTCPHandler::receiveHandshake(int32_t handshake_length) std::array passwd {}; if (!isHandShake(handshake_length)) - throw Exception("Unexpected handshake length received: " + toString(handshake_length), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected handshake length received: {}", toString(handshake_length)); Coordination::read(protocol_version, *in); if (protocol_version != Coordination::ZOOKEEPER_PROTOCOL_VERSION) - throw Exception("Unexpected protocol version: " + toString(protocol_version), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected protocol version: {}", toString(protocol_version)); Coordination::read(last_zxid_seen, *in); Coordination::read(timeout_ms, *in); @@ -471,7 +471,7 @@ void KeeperTCPHandler::runImpl() } if (result.error) - throw Exception("Exception happened while reading from socket", ErrorCodes::SYSTEM_ERROR); + throw Exception(ErrorCodes::SYSTEM_ERROR, "Exception happened while reading from socket"); if (session_stopwatch.elapsedMicroseconds() > static_cast(session_timeout.totalMicroseconds())) { diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 8e2d99e2909..3715d658730 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -123,7 +123,7 @@ void MySQLHandler::run() handshake_response.auth_plugin_name); if (!(client_capabilities & CLIENT_PROTOCOL_41)) - throw Exception("Required capability: CLIENT_PROTOCOL_41.", ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES); + throw Exception(ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES, "Required capability: CLIENT_PROTOCOL_41."); authenticate(handshake_response.username, handshake_response.auth_plugin_name, handshake_response.auth_response); @@ -181,7 +181,7 @@ void MySQLHandler::run() comPing(); break; default: - throw Exception(Poco::format("Command %d is not implemented.", command), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Command {} is not implemented.", command); } } catch (const NetException & exc) @@ -221,7 +221,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::ConnectionPhase::HandshakeResp int ret = socket().receiveBytes(buf + pos, static_cast(packet_size - pos)); if (ret == 0) { - throw Exception("Cannot read all data. Bytes read: " + std::to_string(pos) + ". Bytes expected: 3", ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data. Bytes read: {}. Bytes expected: 3", std::to_string(pos)); } pos += ret; } @@ -368,14 +368,15 @@ void MySQLHandler::comQuery(ReadBuffer & payload) void MySQLHandler::authPluginSSL() { - throw Exception("ClickHouse was built without SSL support. Try specifying password using double SHA1 in users.xml.", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "ClickHouse was built without SSL support. Try specifying password using double SHA1 in users.xml."); } void MySQLHandler::finishHandshakeSSL( [[maybe_unused]] size_t packet_size, [[maybe_unused]] char * buf, [[maybe_unused]] size_t pos, [[maybe_unused]] std::function read_bytes, [[maybe_unused]] MySQLProtocol::ConnectionPhase::HandshakeResponse & packet) { - throw Exception("Client requested SSL, while it is disabled.", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Client requested SSL, while it is disabled."); } #if USE_SSL diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index cbcddbb444a..deadb10f9a9 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -57,16 +57,16 @@ void MySQLHandlerFactory::readRSAKeys() String private_key_file_property = "openSSL.server.privateKeyFile"; if (!config.has(certificate_file_property)) - throw Exception("Certificate file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Certificate file is not set."); if (!config.has(private_key_file_property)) - throw Exception("Private key file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Private key file is not set."); { String certificate_file = config.getString(certificate_file_property); FILE * fp = fopen(certificate_file.data(), "r"); if (fp == nullptr) - throw Exception("Cannot open certificate file: " + certificate_file + ".", ErrorCodes::CANNOT_OPEN_FILE); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Cannot open certificate file: {}.", certificate_file); SCOPE_EXIT( if (0 != fclose(fp)) throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE); @@ -75,16 +75,16 @@ void MySQLHandlerFactory::readRSAKeys() X509 * x509 = PEM_read_X509(fp, nullptr, nullptr, nullptr); SCOPE_EXIT(X509_free(x509)); if (x509 == nullptr) - throw Exception("Failed to read PEM certificate from " + certificate_file + ". Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to read PEM certificate from {}. Error: {}", certificate_file, getOpenSSLErrors()); EVP_PKEY * p = X509_get_pubkey(x509); if (p == nullptr) - throw Exception("Failed to get RSA key from X509. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to get RSA key from X509. Error: {}", getOpenSSLErrors()); SCOPE_EXIT(EVP_PKEY_free(p)); public_key.reset(EVP_PKEY_get1_RSA(p)); if (public_key.get() == nullptr) - throw Exception("Failed to get RSA key from ENV_PKEY. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to get RSA key from ENV_PKEY. Error: {}", getOpenSSLErrors()); } { @@ -92,7 +92,7 @@ void MySQLHandlerFactory::readRSAKeys() FILE * fp = fopen(private_key_file.data(), "r"); if (fp == nullptr) - throw Exception ("Cannot open private key file " + private_key_file + ".", ErrorCodes::CANNOT_OPEN_FILE); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Cannot open private key file {}.", private_key_file); SCOPE_EXIT( if (0 != fclose(fp)) throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE); @@ -100,7 +100,7 @@ void MySQLHandlerFactory::readRSAKeys() private_key.reset(PEM_read_RSAPrivateKey(fp, nullptr, nullptr, nullptr)); if (!private_key) - throw Exception("Failed to read RSA private key from " + private_key_file + ". Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to read RSA private key from {}. Error: {}", private_key_file, getOpenSSLErrors()); } } @@ -109,19 +109,19 @@ void MySQLHandlerFactory::generateRSAKeys() LOG_TRACE(log, "Generating new RSA key pair."); public_key.reset(RSA_new()); if (!public_key) - throw Exception("Failed to allocate RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to allocate RSA key. Error: {}", getOpenSSLErrors()); BIGNUM * e = BN_new(); if (!e) - throw Exception("Failed to allocate BIGNUM. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to allocate BIGNUM. Error: {}", getOpenSSLErrors()); SCOPE_EXIT(BN_free(e)); if (!BN_set_word(e, 65537) || !RSA_generate_key_ex(public_key.get(), 2048, e, nullptr)) - throw Exception("Failed to generate RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to generate RSA key. Error: {}", getOpenSSLErrors()); private_key.reset(RSAPrivateKey_dup(public_key.get())); if (!private_key) - throw Exception("Failed to copy RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to copy RSA key. Error: {}", getOpenSSLErrors()); } #endif diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index b2a3935263d..b017b87fcc1 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -282,7 +282,7 @@ void PostgreSQLHandler::processQuery() settings.max_parser_depth, settings.allow_settings_after_format_in_insert); if (!parse_res.second) - throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse and execute the following part of query: {}", String(parse_res.first)); std::random_device rd; std::mt19937 gen(rd()); diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp index bf02f34c6a3..cd5fe29112a 100644 --- a/src/Server/ProxyV1Handler.cpp +++ b/src/Server/ProxyV1Handler.cpp @@ -28,38 +28,38 @@ void ProxyV1Handler::run() // read "PROXY" if (!readWord(5, word, eol) || word != "PROXY" || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); // read "TCP4" or "TCP6" or "UNKNOWN" if (!readWord(7, word, eol)) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); if (word != "TCP4" && word != "TCP6" && word != "UNKNOWN") - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); if (word == "UNKNOWN" && eol) return; if (eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); // read address if (!readWord(39, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); stack_data.forwarded_for = std::move(word); // read address if (!readWord(39, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); // read port if (!readWord(5, word, eol) || eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); // read port and "\r\n" if (!readWord(5, word, eol) || !eol) - throw ParsingException("PROXY protocol violation", ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw ParsingException(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "PROXY protocol violation"); if (!stack_data.forwarded_for.empty()) LOG_TRACE(log, "Forwarded client address from PROXY header: {}", stack_data.forwarded_for); @@ -104,21 +104,21 @@ bool ProxyV1Handler::readWord(int max_len, std::string & word, bool & eol) } catch (const Poco::Net::NetException & e) { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), socket().peerAddress().toString()); } catch (const Poco::TimeoutException &) { - throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while reading from socket ({}, {} ms)", socket().peerAddress().toString(), - socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + socket().getReceiveTimeout().totalMilliseconds()); } catch (const Poco::IOException & e) { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), socket().peerAddress().toString()); } if (n < 0) - throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket ({})", socket().peerAddress().toString()); return false; } diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index aea15f66c21..13a01ba8139 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -102,7 +102,9 @@ void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServer /// Workaround. Poco does not detect 411 Length Required case. if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && !request.hasContentLength()) - throw Exception("The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", ErrorCodes::HTTP_LENGTH_REQUIRED); + throw Exception(ErrorCodes::HTTP_LENGTH_REQUIRED, + "The Transfer-Encoding is not chunked and there " + "is no Content-Length header for POST request"); setResponseDefaultHeaders(response, keep_alive_timeout); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTPStatus(status)); @@ -135,7 +137,7 @@ void StaticRequestHandler::writeResponse(WriteBuffer & out) String file_path = fs::weakly_canonical(user_files_absolute_path / file_name); if (!fs::exists(file_path)) - throw Exception("Invalid file name " + file_path + " for static HTTPHandler. ", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Invalid file name {} for static HTTPHandler. ", file_path); ReadBufferFromFile in(file_path); copyData(in, out); @@ -143,8 +145,9 @@ void StaticRequestHandler::writeResponse(WriteBuffer & out) else if (startsWith(response_expression, config_prefix)) { if (response_expression.size() <= config_prefix.size()) - throw Exception( "Static handling rule handler must contain a complete configuration path, for example: config://config_key", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Static handling rule handler must contain a complete configuration path, for example: " + "config://config_key"); const auto & config_path = response_expression.substr(config_prefix.size(), response_expression.size() - config_prefix.size()); writeString(server.config().getRawString(config_path, "Ok.\n"), out); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 7446078ed3f..a48a3bb1ed6 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -303,7 +303,7 @@ void TCPHandler::runImpl() query_context->setExternalTablesInitializer([this] (ContextPtr context) { if (context != query_context) - throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in external tables initializer"); /// Get blocks of temporary tables readData(); @@ -318,7 +318,7 @@ void TCPHandler::runImpl() query_context->setInputInitializer([this] (ContextPtr context, const StoragePtr & input_storage) { if (context != query_context) - throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in Input initializer"); auto metadata_snapshot = input_storage->getInMemoryMetadataPtr(); state.need_receive_data_for_input = true; @@ -338,7 +338,7 @@ void TCPHandler::runImpl() query_context->setInputBlocksReaderCallback([this] (ContextPtr context) -> Block { if (context != query_context) - throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in InputBlocksReader"); if (!readDataNext()) { @@ -515,7 +515,7 @@ void TCPHandler::runImpl() catch (...) { state.io.onException(); - exception = std::make_unique("Unknown exception", ErrorCodes::UNKNOWN_EXCEPTION); + exception = std::make_unique(ErrorCodes::UNKNOWN_EXCEPTION, "Unknown exception"); } try @@ -895,7 +895,7 @@ void TCPHandler::receiveUnexpectedTablesStatusRequest() TablesStatusRequest skip_request; skip_request.read(*in, client_tcp_protocol_version); - throw NetException("Unexpected packet TablesStatusRequest received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet TablesStatusRequest received from client"); } void TCPHandler::sendPartUUIDs() @@ -1137,10 +1137,10 @@ void TCPHandler::receiveHello() if (packet_type == 'G' || packet_type == 'P') { writeString(formatHTTPErrorResponseWhenUserIsConnectedToWrongPort(server.config()), *out); - throw Exception("Client has connected to wrong port", ErrorCodes::CLIENT_HAS_CONNECTED_TO_WRONG_PORT); + throw Exception(ErrorCodes::CLIENT_HAS_CONNECTED_TO_WRONG_PORT, "Client has connected to wrong port"); } else - throw NetException("Unexpected packet from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet from client"); } readStringBinary(client_name, *in); @@ -1155,7 +1155,7 @@ void TCPHandler::receiveHello() readStringBinary(password, *in); if (user.empty()) - throw NetException("Unexpected packet from client (no user in Hello package)", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet from client (no user in Hello package)"); LOG_DEBUG(log, "Connected {} version {}.{}.{}, revision: {}{}{}.", client_name, @@ -1208,7 +1208,7 @@ void TCPHandler::receiveUnexpectedHello() readStringBinary(skip_string, *in); readStringBinary(skip_string, *in); - throw NetException("Unexpected packet Hello received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Hello received from client"); } @@ -1296,7 +1296,7 @@ bool TCPHandler::receivePacket() return false; default: - throw Exception("Unknown packet " + toString(packet_type) + " from client", ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, "Unknown packet {} from client", toString(packet_type)); } } @@ -1311,7 +1311,7 @@ void TCPHandler::receiveUnexpectedIgnoredPartUUIDs() { std::vector skip_part_uuids; readVectorBinary(skip_part_uuids, *in); - throw NetException("Unexpected packet IgnoredPartUUIDs received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet IgnoredPartUUIDs received from client"); } @@ -1334,14 +1334,14 @@ String TCPHandler::receiveReadTaskResponseAssumeLocked() } else { - throw Exception(fmt::format("Received {} packet after requesting read task", - Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Received {} packet after requesting read task", + Protocol::Client::toString(packet_type)); } } UInt64 version; readVarUInt(version, *in); if (version != DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION) - throw Exception("Protocol version for distributed processing mismatched", ErrorCodes::UNKNOWN_PROTOCOL); + throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol version for distributed processing mismatched"); String response; readStringBinary(response, *in); return response; @@ -1367,8 +1367,8 @@ std::optional TCPHandler::receivePartitionMergeTreeReadTa } else { - throw Exception(fmt::format("Received {} packet after requesting read task", - Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Received {} packet after requesting read task", + Protocol::Client::toString(packet_type)); } } PartitionReadResponse response; @@ -1475,9 +1475,8 @@ void TCPHandler::receiveQuery() session->authenticate(AlwaysAllowCredentials{client_info.initial_user}, client_info.initial_address); } #else - auto exception = Exception( - "Inter-server secret support is disabled, because ClickHouse was built without SSL library", - ErrorCodes::AUTHENTICATION_FAILED); + auto exception = Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Inter-server secret support is disabled, because ClickHouse was built without SSL library"); session->onAuthenticationFailure(/* user_name */ std::nullopt, socket().peerAddress(), exception); throw exception; /// NOLINT #endif @@ -1564,7 +1563,7 @@ void TCPHandler::receiveUnexpectedQuery() if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS) skip_settings.read(*in, settings_format); - throw NetException("Unexpected packet Query received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Query received from client"); } bool TCPHandler::receiveData(bool scalar) @@ -1647,7 +1646,7 @@ bool TCPHandler::receiveUnexpectedData(bool throw_exception) state.read_all_data = true; if (throw_exception) - throw NetException("Unexpected packet Data received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Data received from client"); return read_ok; } @@ -1766,7 +1765,7 @@ bool TCPHandler::isQueryCancelled() { case Protocol::Client::Cancel: if (state.empty()) - throw NetException("Unexpected packet Cancel received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Cancel received from client"); LOG_INFO(log, "Query was cancelled."); state.is_cancelled = true; /// For testing connection collector. @@ -1781,7 +1780,7 @@ bool TCPHandler::isQueryCancelled() return true; default: - throw NetException("Unknown packet from client " + toString(packet_type), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, "Unknown packet from client {}", toString(packet_type)); } } diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 16b57649a72..7373e6e1c4e 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -57,7 +57,7 @@ public: else if (key.starts_with("host")) allowed_client_hosts.addName(value); else - throw Exception("Unknown address pattern type: " + key, ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE, "Unknown address pattern type: {}", key); } } } @@ -65,7 +65,7 @@ public: Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override { if (!allowed_client_hosts.empty() && !allowed_client_hosts.contains(socket.peerAddress().host())) - throw Exception("Connections from " + socket.peerAddress().toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + throw Exception(ErrorCodes::IP_ADDRESS_NOT_ALLOWED, "Connections from {} are not allowed", socket.peerAddress().toString()); try { diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 5b7377515c1..dd025e3e165 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -45,8 +45,7 @@ public: stack_data.socket = socket(); stack_data.certificate = certificate; #else - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif } private: diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 3c1426d33a5..da11a87eb4d 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -101,7 +101,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.codec) { if (ast_col_decl.default_specifier == "ALIAS") - throw Exception{"Cannot specify codec for column type ALIAS", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); command.codec = ast_col_decl.codec; } if (command_ast->column) @@ -347,8 +347,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ const auto & identifier = identifier_ast->as(); auto insertion = command.settings_resets.emplace(identifier.name()); if (!insertion.second) - throw Exception("Duplicate setting name " + backQuote(identifier.name()), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Duplicate setting name {}", backQuote(identifier.name())); } return command; } @@ -497,8 +496,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) if (if_not_exists) return; else - throw Exception{"Cannot add index " + index_name + ": index with this name already exists", - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add index {}: index with this name already exists", index_name); } auto insert_it = metadata.secondary_indices.end(); @@ -521,9 +519,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { auto hints = metadata.secondary_indices.getHints(after_index_name); auto hints_string = !hints.empty() ? ", may be you meant: " + toString(hints) : ""; - throw Exception( - "Wrong index name. Cannot find index " + backQuote(after_index_name) + " to insert after" + hints_string, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong index name. Cannot find index {} to insert after{}", + backQuote(after_index_name), hints_string); } ++insert_it; @@ -549,8 +546,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) return; auto hints = metadata.secondary_indices.getHints(index_name); auto hints_string = !hints.empty() ? ", may be you meant: " + toString(hints) : ""; - throw Exception( - "Wrong index name. Cannot find index " + backQuote(index_name) + " to drop" + hints_string, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong index name. Cannot find index {} to drop{}", + backQuote(index_name), hints_string); } metadata.secondary_indices.erase(erase_it); @@ -569,8 +566,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { if (if_not_exists) return; - throw Exception("Cannot add constraint " + constraint_name + ": constraint with this name already exists", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add constraint {}: constraint with this name already exists", + constraint_name); } auto * insert_it = constraints.end(); @@ -589,8 +586,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { if (if_exists) return; - throw Exception("Wrong constraint name. Cannot find constraint `" + constraint_name + "` to drop", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong constraint name. Cannot find constraint `{}` to drop", + constraint_name); } constraints.erase(erase_it); metadata.constraints = ConstraintsDescription(constraints); @@ -684,7 +681,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) rename_visitor.visit(index.definition_ast); } else - throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); } namespace @@ -905,30 +902,20 @@ std::optional AlterCommand::tryConvertToMutationCommand(Storage return result; } -bool AlterCommands::hasInvertedIndex(const StorageInMemoryMetadata & metadata, ContextPtr context) +bool AlterCommands::hasInvertedIndex(const StorageInMemoryMetadata & metadata) { for (const auto & index : metadata.secondary_indices) { - IndexDescription index_desc; - try - { - index_desc = IndexDescription::getIndexFromAST(index.definition_ast, metadata.columns, context); - } - catch (...) - { - continue; - } - if (index.type == GinFilter::FilterName) - { + if (index.type == INVERTED_INDEX_NAME) return true; - } } return false; } + void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context) const { if (!prepared) - throw DB::Exception("Alter commands is not prepared. Cannot apply. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Alter commands is not prepared. Cannot apply. It's a bug"); auto metadata_copy = metadata; @@ -1057,7 +1044,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const auto & command = (*this)[i]; if (command.ttl && !table->supportsTTL()) - throw Exception("Engine " + table->getName() + " doesn't support TTL clause", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine {} doesn't support TTL clause", table->getName()); const auto & column_name = command.column_name; if (command.type == AlterCommand::ADD_COLUMN) @@ -1093,8 +1080,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { String exception_message = fmt::format("Wrong column. Cannot find column {} to modify", backQuote(column_name)); all_columns.appendHintsMessage(exception_message, column_name); - throw Exception{exception_message, - ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK}; + throw Exception::createDeprecated(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } else continue; @@ -1193,9 +1179,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const auto required_columns = actions->getRequiredColumns(); if (required_columns.end() != std::find(required_columns.begin(), required_columns.end(), command.column_name)) - throw Exception("Cannot drop column " + backQuote(command.column_name) - + ", because column " + backQuote(column.name) + " depends on it", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot drop column {}, because column {} depends on it", + backQuote(command.column_name), backQuote(column.name)); } } } @@ -1203,9 +1188,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } else if (!command.if_exists) { - String exception_message = fmt::format("Wrong column name. Cannot find column {} to drop", backQuote(command.column_name)); - all_columns.appendHintsMessage(exception_message, command.column_name); - throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + auto message = PreformattedMessage::create( + "Wrong column name. Cannot find column {} to drop", backQuote(command.column_name)); + all_columns.appendHintsMessage(message.text, command.column_name); + throw Exception(std::move(message), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } } else if (command.type == AlterCommand::COMMENT_COLUMN) @@ -1214,16 +1200,17 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (!command.if_exists) { - String exception_message = fmt::format("Wrong column name. Cannot find column {} to comment", backQuote(command.column_name)); - all_columns.appendHintsMessage(exception_message, command.column_name); - throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + auto message = PreformattedMessage::create( + "Wrong column name. Cannot find column {} to comment", backQuote(command.column_name)); + all_columns.appendHintsMessage(message.text, command.column_name); + throw Exception(std::move(message), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } } } else if (command.type == AlterCommand::MODIFY_SETTING || command.type == AlterCommand::RESET_SETTING) { if (metadata.settings_changes == nullptr) - throw Exception{"Cannot alter settings, because table engine doesn't support settings changes", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot alter settings, because table engine doesn't support settings changes"); } else if (command.type == AlterCommand::RENAME_COLUMN) { @@ -1233,28 +1220,27 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const if (next_command.type == AlterCommand::RENAME_COLUMN) { if (next_command.column_name == command.rename_to) - throw Exception{"Transitive renames in a single ALTER query are not allowed (don't make sense)", - ErrorCodes::NOT_IMPLEMENTED}; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transitive renames in a single ALTER query are not allowed (don't make sense)"); else if (next_command.column_name == command.column_name) - throw Exception{"Cannot rename column '" + backQuote(command.column_name) - + "' to two different names in a single ALTER query", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot rename column '{}' to two different names in a single ALTER query", + backQuote(command.column_name)); } } /// TODO Implement nested rename if (all_columns.hasNested(command.column_name)) { - throw Exception{"Cannot rename whole Nested struct", ErrorCodes::NOT_IMPLEMENTED}; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename whole Nested struct"); } if (!all_columns.has(command.column_name)) { if (!command.if_exists) { - String exception_message = fmt::format("Wrong column name. Cannot find column {} to rename", backQuote(command.column_name)); - all_columns.appendHintsMessage(exception_message, command.column_name); - throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + auto message = PreformattedMessage::create( + "Wrong column name. Cannot find column {} to rename", backQuote(command.column_name)); + all_columns.appendHintsMessage(message.text, command.column_name); + throw Exception(std::move(message), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } else continue; diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index a79827b355d..3e526dcc0bb 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -212,7 +212,7 @@ public: MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters=false) const; /// Check if commands have any inverted index - static bool hasInvertedIndex(const StorageInMemoryMetadata & metadata, ContextPtr context); + static bool hasInvertedIndex(const StorageInMemoryMetadata & metadata); }; } diff --git a/src/Storages/ColumnDefault.cpp b/src/Storages/ColumnDefault.cpp index 3cf49ea69fc..dcb59f7bd65 100644 --- a/src/Storages/ColumnDefault.cpp +++ b/src/Storages/ColumnDefault.cpp @@ -36,7 +36,7 @@ ColumnDefaultKind columnDefaultKindFromString(const std::string & str) if (it != std::end(map)) return it->second; - throw Exception{"Unknown column default specifier: " + str, ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column default specifier: {}", str); } @@ -53,7 +53,7 @@ std::string toString(const ColumnDefaultKind kind) if (it != std::end(map)) return it->second; - throw Exception{"Invalid ColumnDefaultKind", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ColumnDefaultKind"); } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index ed2b4fd24e2..ebbf81f1faa 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -136,7 +136,7 @@ void ColumnDescription::readText(ReadBuffer & buf) ttl = col_ast->ttl; } else - throw Exception("Cannot parse column description", ErrorCodes::CANNOT_PARSE_TEXT); + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); } } @@ -217,8 +217,7 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu { auto range = getNameRange(columns, after_column); if (range.first == range.second) - throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after", - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Wrong column name. Cannot find column {} to insert after", after_column); insert_it = range.second; } @@ -235,7 +234,7 @@ void ColumnsDescription::remove(const String & column_name) { String exception_message = fmt::format("There is no column {} in table", column_name); appendHintsMessage(exception_message, column_name); - throw Exception(exception_message, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); } for (auto list_it = range.first; list_it != range.second;) @@ -252,7 +251,7 @@ void ColumnsDescription::rename(const String & column_from, const String & colum { String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_from); appendHintsMessage(exception_message, column_from); - throw Exception(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); } columns.get<1>().modify_key(it, [&column_to] (String & old_name) @@ -268,7 +267,7 @@ void ColumnsDescription::modifyColumnOrder(const String & column_name, const Str auto column_range = getNameRange(columns, column_name); if (column_range.first == column_range.second) - throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table.", column_name); std::vector moving_columns; for (auto list_it = column_range.first; list_it != column_range.second;) @@ -287,8 +286,7 @@ void ColumnsDescription::modifyColumnOrder(const String & column_name, const Str /// Checked first auto range = getNameRange(columns, after_column); if (range.first == range.second) - throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after", - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Wrong column name. Cannot find column {} to insert after", after_column); reorder_column([&]() { return getNameRange(columns, after_column).second; }); } @@ -472,8 +470,7 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co { auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end()) - throw Exception("There is no column " + column_name + " in table.", - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table.", column_name); return *it; } @@ -807,7 +804,7 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N { for (const auto & child : default_expr_list->children) if (child->as() || child->as() || child->as()) - throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + throw Exception(ErrorCodes::THERE_IS_NO_DEFAULT_VALUE, "Select query is not allowed in columns DEFAULT expression"); try { @@ -815,7 +812,7 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); for (const auto & action : actions->getActions()) if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + throw Exception(ErrorCodes::THERE_IS_NO_DEFAULT_VALUE, "Unsupported default value that requires ARRAY JOIN action"); return actions->getSampleBlock(); } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index eea5dc7fcbb..32c52bdcb9e 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -161,12 +161,12 @@ public: { String exception_message = fmt::format("Cannot find column {} in ColumnsDescription", column_name); appendHintsMessage(exception_message, column_name); - throw Exception(exception_message, ErrorCodes::LOGICAL_ERROR); + throw Exception::createDeprecated(exception_message, ErrorCodes::LOGICAL_ERROR); } removeSubcolumns(it->name); if (!columns.get<1>().modify(it, std::forward(f))) - throw Exception("Cannot modify ColumnDescription for column " + column_name + ": column name cannot be changed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot modify ColumnDescription for column {}: column name cannot be changed", column_name); addSubcolumns(it->name, it->type); modifyColumnOrder(column_name, after_column, first); diff --git a/src/Storages/CompressionCodecSelector.h b/src/Storages/CompressionCodecSelector.h index 4c088924cdb..ad6e943e821 100644 --- a/src/Storages/CompressionCodecSelector.h +++ b/src/Storages/CompressionCodecSelector.h @@ -79,7 +79,7 @@ public: for (const auto & name : keys) { if (!startsWith(name, "case")) - throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}, must be 'case'", config_prefix, name); elements.emplace_back(config, config_prefix + "." + name); } diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index 96037b46e52..5207458af8c 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -57,7 +57,7 @@ ASTs ConstraintsDescription::filterConstraints(ConstraintType selection) const case ASTConstraintDeclaration::Type::ASSUME: return static_cast(ConstraintType::ASSUME); } - throw Exception("Unknown constraint type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown constraint type."); }; ASTs res; diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 39e91e19014..eb1d83af851 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -121,11 +121,10 @@ namespace { if (expected != calculated) { - String message = "Checksum of extra info doesn't match: corrupted data." - " Reference: " + getHexUIntLowercase(expected.first) + getHexUIntLowercase(expected.second) - + ". Actual: " + getHexUIntLowercase(calculated.first) + getHexUIntLowercase(calculated.second) - + "."; - throw Exception(message, ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, + "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.", + getHexUIntLowercase(expected.first), getHexUIntLowercase(expected.second), + getHexUIntLowercase(calculated.first), getHexUIntLowercase(calculated.second)); } } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 7ada07b83fe..3cf1ef2678f 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -128,7 +128,7 @@ DistributedSink::DistributedSink( { const auto & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); context->getClientInfo().distributed_depth += 1; random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; } @@ -265,7 +265,7 @@ void DistributedSink::waitForJobs() if (static_cast(watch.elapsedSeconds()) > insert_timeout) { ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded); - throw Exception("Synchronous distributed insert timeout exceeded.", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Synchronous distributed insert timeout exceeded."); } } @@ -359,12 +359,12 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si { /// Skip replica_index in case of internal replication if (shard_job.replicas_jobs.size() != 1) - throw Exception("There are several writing job for an automatically replicated shard", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are several writing job for an automatically replicated shard"); /// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here auto results = shard_info.pool->getManyChecked(timeouts, &settings, PoolMode::GET_ONE, main_table.getQualifiedName()); if (results.empty() || results.front().entry.isNull()) - throw Exception("Expected exactly one connection for shard " + toString(job.shard_index), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}", toString(job.shard_index)); job.connection_entry = std::move(results.front().entry); } @@ -374,11 +374,11 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si const ConnectionPoolPtr & connection_pool = shard_info.per_replica_pools.at(job.replica_index); if (!connection_pool) - throw Exception("Connection pool for replica " + replica.readableString() + " does not exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Connection pool for replica {} does not exist", replica.readableString()); job.connection_entry = connection_pool->get(timeouts, &settings); if (job.connection_entry.isNull()) - throw Exception("Got empty connection for replica" + replica.readableString(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty connection for replica{}", replica.readableString()); } if (throttler) @@ -635,7 +635,7 @@ void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) settings.prefer_localhost_replica, settings.use_compact_format_in_distributed_parts_names); if (path.empty()) - throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Directory name for async inserts is empty"); writeToShard(shard_info, block_to_send, {path}); } } diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index c13c3c4f1ef..d7c3fe44f38 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -138,7 +138,8 @@ std::optional getExternalDataSourceConfiguration( || configuration.database.empty() || (configuration.table.empty() && !is_database_engine))) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some of the parameters and no key-value arguments are added"); + "Named collection of connection parameters is missing some " + "of the parameters and no key-value arguments are added"); } /// Check key-value arguments. @@ -250,7 +251,8 @@ std::optional getExternalDataSourceConfiguration( if (configuration.host.empty() || configuration.port == 0 || configuration.username.empty() || configuration.table.empty()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some of the parameters and dictionary parameters are not added"); + "Named collection of connection parameters is missing some " + "of the parameters and dictionary parameters are not added"); } return ExternalDataSourceInfo{ .configuration = configuration, .specific_args = {}, .settings_changes = config_settings }; } @@ -373,7 +375,8 @@ ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority( || replica_configuration.username.empty() || replica_configuration.password.empty()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some of the parameters and no other dictionary parameters are added"); + "Named collection of connection parameters is missing some " + "of the parameters and no other dictionary parameters are added"); } configuration.replicas_configurations[priority].emplace_back(replica_configuration); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 5835dc3294f..7838db881e9 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -306,7 +306,8 @@ Pipe StorageFileLog::read( { /// If there are MVs depended on this table, we just forbid reading if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, + "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); if (mv_attached) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageFileLog with attached materialized views"); @@ -314,7 +315,7 @@ Pipe StorageFileLog::read( std::lock_guard lock(file_infos_mutex); if (running_streams) { - throw Exception("Another select query is running on this table, need to wait it finish.", ErrorCodes::CANNOT_SELECT); + throw Exception(ErrorCodes::CANNOT_SELECT, "Another select query is running on this table, need to wait it finish."); } updateFileInfos(); @@ -670,7 +671,7 @@ bool StorageFileLog::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist", table_id.getNameForLogs()); auto metadata_snapshot = getInMemoryMetadataPtr(); auto storage_snapshot = getStorageSnapshot(metadata_snapshot, getContext()); @@ -766,35 +767,34 @@ void registerStorageFileLog(StorageFactory & factory) } else if (num_threads < 1) { - throw Exception("Number of threads to parse files can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of threads to parse files can not be lower than 1"); } if (filelog_settings->max_block_size.changed && filelog_settings->max_block_size.value < 1) { - throw Exception("filelog_max_block_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "filelog_max_block_size can not be lower than 1"); } if (filelog_settings->poll_max_batch_size.changed && filelog_settings->poll_max_batch_size.value < 1) { - throw Exception("filelog_poll_max_batch_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "filelog_poll_max_batch_size can not be lower than 1"); } size_t init_sleep_time = filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds(); size_t max_sleep_time = filelog_settings->poll_directory_watch_events_backoff_max.totalMilliseconds(); if (init_sleep_time > max_sleep_time) { - throw Exception( - "poll_directory_watch_events_backoff_init can not be greater than poll_directory_watch_events_backoff_max", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "poll_directory_watch_events_backoff_init can not " + "be greater than poll_directory_watch_events_backoff_max"); } if (filelog_settings->poll_directory_watch_events_backoff_factor.changed && !filelog_settings->poll_directory_watch_events_backoff_factor.value) - throw Exception("poll_directory_watch_events_backoff_factor can not be 0", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "poll_directory_watch_events_backoff_factor can not be 0"); if (args_count != 2) - throw Exception( - "Arguments size of StorageFileLog should be 2, path and format name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of StorageFileLog should be 2, path and format name"); auto path_ast = evaluateConstantExpressionAsLiteral(engine_args[0], args.getContext()); auto format_ast = evaluateConstantExpressionAsLiteral(engine_args[1], args.getContext()); diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp index 74adf3de0ae..d2e19551c92 100644 --- a/src/Storages/Freeze.cpp +++ b/src/Storages/Freeze.cpp @@ -132,7 +132,9 @@ BlockIO Unfreezer::systemUnfreeze(const String & backup_name) static constexpr auto config_key = "enable_system_unfreeze"; if (!config.getBool(config_key, false)) { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Support for SYSTEM UNFREEZE query is disabled. You can enable it via '{}' server setting", config_key); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Support for SYSTEM UNFREEZE query is disabled. You can enable it via '{}' server setting", + config_key); } auto disks_map = local_context->getDisksMap(); diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp index c95a1104f83..dac841359da 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -179,10 +179,10 @@ off_t AsynchronousReadBufferFromHDFS::seek(off_t offset, int whence) ProfileEvents::increment(ProfileEvents::RemoteFSSeeks); if (whence != SEEK_SET) - throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); if (offset < 0) - throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", offset); size_t new_pos = offset; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index c6b4a5da8b0..932e80831fe 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -75,8 +75,7 @@ void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration #if USE_KRB5 if (isUser) { - throw Exception("hadoop.security.kerberos.ticket.cache.path cannot be set per user", - ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "hadoop.security.kerberos.ticket.cache.path cannot be set per user"); } hadoop_security_kerberos_ticket_cache_path = config.getString(key_path); @@ -103,7 +102,7 @@ void HDFSBuilderWrapper::runKinit() } catch (const DB::Exception & e) { - throw Exception("KerberosInit failure: "+ getExceptionMessage(e, false), ErrorCodes::KERBEROS_ERROR); + throw Exception(ErrorCodes::KERBEROS_ERROR, "KerberosInit failure: {}", getExceptionMessage(e, false)); } LOG_DEBUG(&Poco::Logger::get("HDFSClient"), "Finished KerberosInit"); } @@ -116,13 +115,12 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A auto port = uri.getPort(); const String path = "//"; if (host.empty()) - throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal HDFS URI: {}", uri.toString()); HDFSBuilderWrapper builder; if (builder.get() == nullptr) - throw Exception("Unable to create builder to connect to HDFS: " + - uri.toString() + " " + String(hdfsGetLastError()), - ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Unable to create builder to connect to HDFS: {} {}", + uri.toString(), String(hdfsGetLastError())); hdfsBuilderConfSetStr(builder.get(), "input.read.timeout", "60000"); // 1 min hdfsBuilderConfSetStr(builder.get(), "input.write.timeout", "60000"); // 1 min @@ -175,8 +173,7 @@ HDFSFSPtr createHDFSFS(hdfsBuilder * builder) { HDFSFSPtr fs(hdfsBuilderConnect(builder)); if (fs == nullptr) - throw Exception("Unable to connect to HDFS: " + String(hdfsGetLastError()), - ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Unable to connect to HDFS: {}", String(hdfsGetLastError())); return fs; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index 3f5c81dc01b..7f80dcce2d2 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -8,6 +8,12 @@ #include +namespace ProfileEvents +{ + extern const Event RemoteReadThrottlerBytes; + extern const Event RemoteReadThrottlerSleepMicroseconds; +} + namespace DB { @@ -103,7 +109,7 @@ struct ReadBufferFromHDFS::ReadBufferFromHDFSImpl : public BufferWithOwnMemoryadd(bytes_read); + read_settings.remote_throttler->add(bytes_read, ProfileEvents::RemoteReadThrottlerBytes, ProfileEvents::RemoteReadThrottlerSleepMicroseconds); return true; } @@ -174,10 +180,10 @@ bool ReadBufferFromHDFS::nextImpl() off_t ReadBufferFromHDFS::seek(off_t offset_, int whence) { if (whence != SEEK_SET) - throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); if (offset_ < 0) - throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", offset_); if (!working_buffer.empty() && size_t(offset_) >= impl->getPosition() - working_buffer.size() diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index b0bad44092a..c915213f4ac 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -132,7 +132,7 @@ namespace return {uri.substr(pos), uri.substr(0, pos)}; } - throw Exception("Storage HDFS requires valid URL to be set", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); } std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map * last_mod_times = nullptr) @@ -204,9 +204,8 @@ ColumnsDescription StorageHDFS::getTableStructureFromData( if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path. You must " - "specify table structure manually", - format); + "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path." + " You must specify table structure manually", format); std::optional columns_from_cache; if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs) @@ -342,13 +341,6 @@ HDFSSource::HDFSSource( initialize(); } -void HDFSSource::onCancel() -{ - std::lock_guard lock(reader_mutex); - if (reader) - reader->cancel(); -} - bool HDFSSource::initialize() { current_path = (*file_iterator)(); @@ -388,8 +380,12 @@ Chunk HDFSSource::generate() { while (true) { - if (!reader || isCancelled()) + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); break; + } Chunk chunk; if (reader->pull(chunk)) @@ -417,15 +413,12 @@ Chunk HDFSSource::generate() return Chunk(std::move(columns), num_rows); } - { - std::lock_guard lock(reader_mutex); - reader.reset(); - pipeline.reset(); - read_buf.reset(); + reader.reset(); + pipeline.reset(); + read_buf.reset(); - if (!initialize()) - break; - } + if (!initialize()) + break; } return {}; } @@ -717,8 +710,9 @@ void registerStorageHDFS(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.empty() || engine_args.size() > 3) - throw Exception( - "Storage HDFS requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage HDFS requires 1, 2 or 3 arguments: " + "url, name of used format (taken from file extension by default) and optional compression method."); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b641f5bfb43..585f5df6ceb 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -142,8 +142,6 @@ public: Chunk generate() override; - void onCancel() override; - private: StorageHDFSPtr storage; Block block_for_format; @@ -155,8 +153,6 @@ private: std::unique_ptr read_buf; std::unique_ptr pipeline; std::unique_ptr reader; - /// onCancel and generate can be called concurrently. - std::mutex reader_mutex; String current_path; /// Recreate ReadBuffer and PullingPipelineExecutor for each file. diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 71007ebc371..f6e6f773d6c 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include @@ -42,15 +43,17 @@ StorageHDFSCluster::StorageHDFSCluster( const String & format_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const String & compression_method_) + const String & compression_method_, + bool structure_argument_was_provided_) : IStorageCluster(table_id_) , cluster_name(cluster_name_) , uri(uri_) , format_name(format_name_) , compression_method(compression_method_) + , structure_argument_was_provided(structure_argument_was_provided_) { - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); checkHDFSURL(uri_); + context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); StorageInMemoryMetadata storage_metadata; @@ -58,7 +61,6 @@ StorageHDFSCluster::StorageHDFSCluster( { auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); storage_metadata.setColumns(columns); - add_columns_structure_to_query = true; } else storage_metadata.setColumns(columns_); @@ -91,7 +93,7 @@ Pipe StorageHDFSCluster::read( const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; auto query_to_send = query_info.original_query->clone(); - if (add_columns_structure_to_query) + if (!structure_argument_was_provided) addColumnsStructureToQueryWithClusterEngine( query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName()); diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 08f67aef582..4d6548a6b78 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -28,7 +28,8 @@ public: const String & format_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const String & compression_method_); + const String & compression_method_, + bool structure_argument_was_provided_); std::string getName() const override { return "HDFSCluster"; } @@ -48,7 +49,7 @@ private: String uri; String format_name; String compression_method; - bool add_columns_structure_to_query = false; + bool structure_argument_was_provided; }; diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index 1f952ec2bd9..2198bb65761 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -8,6 +8,13 @@ #include #include + +namespace ProfileEvents +{ + extern const Event RemoteWriteThrottlerBytes; + extern const Event RemoteWriteThrottlerSleepMicroseconds; +} + namespace DB { @@ -18,7 +25,6 @@ extern const int CANNOT_OPEN_FILE; extern const int CANNOT_FSYNC; } - struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl { std::string hdfs_uri; @@ -45,8 +51,8 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl if (fout == nullptr) { - throw Exception("Unable to open HDFS file: " + path + " error: " + std::string(hdfsGetLastError()), - ErrorCodes::CANNOT_OPEN_FILE); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} error: {}", + path, std::string(hdfsGetLastError())); } } @@ -59,12 +65,11 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl int write(const char * start, size_t size) const { int bytes_written = hdfsWrite(fs.get(), fout, start, safe_cast(size)); - if (write_settings.remote_throttler) - write_settings.remote_throttler->add(bytes_written); - if (bytes_written < 0) - throw Exception("Fail to write HDFS file: " + hdfs_uri + " " + std::string(hdfsGetLastError()), - ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Fail to write HDFS file: {} {}", hdfs_uri, std::string(hdfsGetLastError())); + + if (write_settings.remote_throttler) + write_settings.remote_throttler->add(bytes_written, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds); return bytes_written; } diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index 219fe562f2c..629c8689263 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -32,7 +32,7 @@ namespace ErrorCodes do \ { \ if (const ::arrow::Status & _s = (status); !_s.ok()) \ - throw Exception(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ + throw Exception::createDeprecated(_s.ToString(), ErrorCodes::BAD_ARGUMENTS); \ } while (false) @@ -249,9 +249,8 @@ void HiveORCFile::loadSplitMinMaxIndexesImpl() auto stripe_num = raw_reader->getNumberOfStripes(); auto stripe_stats_num = raw_reader->getNumberOfStripeStatistics(); if (stripe_num != stripe_stats_num) - throw Exception( - fmt::format("orc file:{} has different strip num {} and strip statistics num {}", path, stripe_num, stripe_stats_num), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "orc file:{} has different strip num {} and strip statistics num {}", path, stripe_num, stripe_stats_num); split_minmax_idxes.resize(stripe_num); for (size_t i = 0; i < stripe_num; ++i) diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 1556d6860c1..1f5e31f1d54 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -72,7 +72,7 @@ public: { return VALID_HDFS_FORMATS.find(format_class)->second; } - throw Exception("Unsupported hdfs file format " + format_class, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported hdfs file format {}", format_class); } IHiveFile( @@ -134,12 +134,12 @@ public: protected: virtual void loadFileMinMaxIndexImpl() { - throw Exception("Method loadFileMinMaxIndexImpl is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadFileMinMaxIndexImpl is not supported by hive file:{}", getFormatName()); } virtual void loadSplitMinMaxIndexesImpl() { - throw Exception("Method loadSplitMinMaxIndexesImpl is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadSplitMinMaxIndexesImpl is not supported by hive file:{}", getFormatName()); } virtual std::optional getRowsImpl() = 0; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 3fb7be5b697..445f496bbed 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -451,9 +451,9 @@ void StorageHive::lazyInitialize() format_name = "HiveText"; break; case FileFormat::RC_FILE: - throw Exception("Unsopported hive format rc_file", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsopported hive format rc_file"); case FileFormat::SEQUENCE_FILE: - throw Exception("Unsopported hive format sequence_file", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsopported hive format sequence_file"); case FileFormat::AVRO: format_name = "Avro"; break; @@ -556,7 +556,7 @@ static HiveFilePtr createHiveFile( } else { - throw Exception("IHiveFile not implemented for format " + format_name, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "IHiveFile not implemented for format {}", format_name); } return hive_file; } @@ -587,9 +587,8 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( /// Check partition values if (partition.values.size() != partition_names.size()) - throw Exception( - fmt::format("Partition value size not match, expect {}, but got {}", partition_names.size(), partition.values.size()), - ErrorCodes::INVALID_PARTITION_VALUE); + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, + "Partition value size not match, expect {}, but got {}", partition_names.size(), partition.values.size()); /// Join partition values in CSV format WriteBufferFromOwnString wb; @@ -608,7 +607,7 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( auto reader = std::make_unique(pipeline); Block block; if (!reader->pull(block) || !block.rows()) - throw Exception("Could not parse partition value: " + wb.str(), ErrorCodes::INVALID_PARTITION_VALUE); + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Could not parse partition value: {}", wb.str()); /// Get partition values FieldVector fields(partition_names.size()); @@ -861,7 +860,11 @@ HiveFiles StorageHive::collectHiveFiles( hit_parttions_num += 1; if (hive_max_query_partitions > 0 && hit_parttions_num > hive_max_query_partitions) { - throw Exception(ErrorCodes::TOO_MANY_PARTITIONS, "Too many partitions to query for table {}.{} . Maximum number of partitions to read is limited to {}", hive_database, hive_table, hive_max_query_partitions); + throw Exception(ErrorCodes::TOO_MANY_PARTITIONS, + "Too many partitions " + "to query for table {}.{} . Maximum number of partitions " + "to read is limited to {}", + hive_database, hive_table, hive_max_query_partitions); } hive_files.insert(std::end(hive_files), std::begin(hive_files_in_partition), std::end(hive_files_in_partition)); } @@ -891,7 +894,7 @@ HiveFiles StorageHive::collectHiveFiles( SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /* metadata_snapshot*/, ContextPtr /*context*/) { - throw Exception("Method write is not implemented for StorageHive", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for StorageHive"); } NamesAndTypesList StorageHive::getVirtuals() const @@ -968,13 +971,13 @@ void registerStorageHive(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() != 3) - throw Exception( - "Storage Hive requires 3 arguments: hive metastore address, hive database and hive table", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Hive requires 3 arguments: " + "hive metastore address, hive database and hive table"); auto * partition_by = args.storage_def->partition_by; if (!partition_by) - throw Exception("Storage Hive requires partition by clause", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage Hive requires partition by clause"); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 76100624d51..9bcfff65c95 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -39,12 +39,9 @@ RWLockImpl::LockHolder IStorage::tryLockTimed( if (!lock_holder) { const String type_str = type == RWLockImpl::Type::Read ? "READ" : "WRITE"; - throw Exception( - type_str + " locking attempt on \"" + getStorageID().getFullTableName() + "\" has timed out! (" - + std::to_string(acquire_timeout.count()) - + "ms) " - "Possible deadlock avoided. Client should retry.", - ErrorCodes::DEADLOCK_AVOIDED); + throw Exception(ErrorCodes::DEADLOCK_AVOIDED, + "{} locking attempt on \"{}\" has timed out! ({}ms) Possible deadlock avoided. Client should retry.", + type_str, getStorageID(), acquire_timeout.count()); } return lock_holder; } @@ -53,10 +50,10 @@ TableLockHolder IStorage::lockForShare(const String & query_id, const std::chron { TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); - if (is_dropped) + if (is_dropped || is_detached) { auto table_id = getStorageID(); - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped or detached", table_id.database_name, table_id.table_name); } return result; } @@ -65,7 +62,7 @@ TableLockHolder IStorage::tryLockForShare(const String & query_id, const std::ch { TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); - if (is_dropped) + if (is_dropped || is_detached) { // Table was dropped while acquiring the lock result = nullptr; @@ -82,10 +79,10 @@ IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds throw Exception(ErrorCodes::DEADLOCK_AVOIDED, "Locking attempt for ALTER on \"{}\" has timed out! ({} ms) " "Possible deadlock avoided. Client should retry.", - getStorageID().getFullTableName(), std::to_string(acquire_timeout.count())); + getStorageID().getFullTableName(), acquire_timeout.count()); - if (is_dropped) - throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); + if (is_dropped || is_detached) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is dropped or detached", getStorageID()); return lock; } @@ -96,8 +93,8 @@ TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, cons TableExclusiveLockHolder result; result.drop_lock = tryLockTimed(drop_lock, RWLockImpl::Write, query_id, acquire_timeout); - if (is_dropped) - throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); + if (is_dropped || is_detached) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is dropped or detached", getStorageID()); return result; } @@ -110,7 +107,7 @@ Pipe IStorage::watch( size_t /*max_block_size*/, size_t /*num_streams*/) { - throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method watch is not supported by storage {}", getName()); } Pipe IStorage::read( @@ -122,7 +119,7 @@ Pipe IStorage::read( size_t /*max_block_size*/, size_t /*num_streams*/) { - throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method read is not supported by storage {}", getName()); } void IStorage::read( @@ -170,7 +167,7 @@ std::optional IStorage::distributedWrite( Pipe IStorage::alterPartition( const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, ContextPtr /* context */) { - throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Partition operations are not supported by storage {}", getName()); } void IStorage::alter(const AlterCommands & params, ContextPtr context, AlterLockHolder &) @@ -194,13 +191,13 @@ void IStorage::checkAlterIsPossible(const AlterCommands & commands, ContextPtr / void IStorage::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const { - throw Exception("Table engine " + getName() + " doesn't support mutations", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support mutations", getName()); } void IStorage::checkAlterPartitionIsPossible( const PartitionCommands & /*commands*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & /*settings*/) const { - throw Exception("Table engine " + getName() + " doesn't support partitioning", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support partitioning", getName()); } StorageID IStorage::getStorageID() const diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 7d927b51e5f..699780db0b9 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -19,7 +19,6 @@ #include #include -#include #include @@ -392,7 +391,7 @@ public: const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr /*context*/) { - throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not supported by storage {}", getName()); } /** Writes the data to a table in distributed manner. @@ -424,7 +423,7 @@ public: ContextPtr /* context */, TableExclusiveLockHolder &) { - throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported by storage {}", getName()); } virtual void checkTableCanBeRenamed(const StorageID & /*new_name*/) const {} @@ -484,35 +483,35 @@ public: const Names & /* deduplicate_by_columns */, ContextPtr /*context*/) { - throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method optimize is not supported by storage {}", getName()); } /// Mutate the table contents - virtual void mutate(const MutationCommands &, ContextPtr, bool /*force_wait*/) + virtual void mutate(const MutationCommands &, ContextPtr) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } /// Cancel a mutation. virtual CancellationCode killMutation(const String & /*mutation_id*/) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } virtual void waitForMutation(const String & /*mutation_id*/) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } virtual void setMutationCSN(const String & /*mutation_id*/, UInt64 /*csn*/) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } /// Cancel a part move to shard. virtual CancellationCode killPartMoveToShard(const UUID & /*task_uuid*/) { - throw Exception("Part moves between shards are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Part moves between shards are not supported by storage {}", getName()); } /** If the table have to do some complicated work on startup, @@ -562,6 +561,7 @@ public: virtual void onActionLockRemove(StorageActionBlockType /* action_type */) {} std::atomic is_dropped{false}; + std::atomic is_detached{false}; /// Does table support index for IN sections virtual bool supportsIndexForIn() const { return false; } @@ -570,7 +570,7 @@ public: virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const { return false; } /// Checks validity of the data - virtual CheckResults checkData(const ASTPtr & /* query */, ContextPtr /* context */) { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); } + virtual CheckResults checkData(const ASTPtr & /* query */, ContextPtr /* context */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Check query is not supported for {} storage", getName()); } /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 68cf6dfbb28..2e07aceeaa9 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -72,16 +72,16 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast { const auto * index_definition = definition_ast->as(); if (!index_definition) - throw Exception("Cannot create skip index from non ASTIndexDeclaration AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create skip index from non ASTIndexDeclaration AST"); if (index_definition->name.empty()) - throw Exception("Skip index must have name in definition.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Skip index must have name in definition."); if (!index_definition->type) - throw Exception("TYPE is required for index", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "TYPE is required for index"); if (index_definition->type->parameters && !index_definition->type->parameters->children.empty()) - throw Exception("Index type cannot have parameters", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Index type cannot have parameters"); IndexDescription result; result.definition_ast = index_definition->clone(); @@ -111,7 +111,7 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast { const auto * argument = definition_arguments->children[i]->as(); if (!argument) - throw Exception("Only literals can be skip index arguments", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments"); result.arguments.emplace_back(argument->value); } } diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index 1cfbd145fb1..b3e0c6a8839 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -263,7 +263,9 @@ void KafkaConsumer::commit() { // TODO: insert atomicity / transactions is needed here (possibility to rollback, on 2 phase commits) ProfileEvents::increment(ProfileEvents::KafkaCommitFailures); - throw Exception("All commit attempts failed. Last block was already written to target table(s), but was not committed to Kafka.", ErrorCodes::CANNOT_COMMIT_OFFSET); + throw Exception(ErrorCodes::CANNOT_COMMIT_OFFSET, + "All commit attempts failed. Last block was already written to target table(s), " + "but was not committed to Kafka."); } else { diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index db528adec79..c456ab1550a 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -156,7 +156,7 @@ Chunk KafkaSource::generateImpl() // KafkaConsumer::messages, which is accessed from // KafkaConsumer::currentTopic() (and other helpers). if (consumer->isStalled()) - throw Exception("Polled messages became unusable", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Polled messages became unusable"); ProfileEvents::increment(ProfileEvents::KafkaRowsRead, new_rows); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 635d0e7864a..50fb7dffa34 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -122,7 +122,7 @@ struct StorageKafkaInterceptors return thread_status_ptr.get() == current_thread; }); if (it == self->thread_statuses.end()) - throw Exception("No thread status for this librdkafka thread.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No thread status for this librdkafka thread."); self->thread_statuses.erase(it); @@ -298,7 +298,8 @@ Pipe StorageKafka::read( return {}; if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, + "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); if (mv_attached) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageKafka with attached materialized views"); @@ -334,7 +335,7 @@ SinkToStoragePtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & ProfileEvents::increment(ProfileEvents::KafkaWrites); if (topics.size() > 1) - throw Exception("Can't write to Kafka table with multiple topics!", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't write to Kafka table with multiple topics!"); cppkafka::Configuration conf; conf.set("metadata.broker.list", brokers); @@ -673,7 +674,7 @@ bool StorageKafka::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); CurrentMetrics::Increment metric_increment{CurrentMetrics::KafkaBackgroundReads}; ProfileEvents::increment(ProfileEvents::KafkaBackgroundReads); @@ -771,10 +772,10 @@ void registerStorageKafka(StorageFactory & factory) if (args_count < (ARG_NUM) && (ARG_NUM) <= 4 && \ !kafka_settings->PAR_NAME.changed) \ { \ - throw Exception( \ - "Required parameter '" #PAR_NAME "' " \ + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH,\ + "Required parameter '{}' " \ "for storage Kafka not specified", \ - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); \ + #PAR_NAME); \ } \ if (args_count >= (ARG_NUM)) \ { \ @@ -782,11 +783,11 @@ void registerStorageKafka(StorageFactory & factory) if (has_settings && \ kafka_settings->PAR_NAME.changed) \ { \ - throw Exception( \ - "The argument №" #ARG_NUM " of storage Kafka " \ - "and the parameter '" #PAR_NAME "' " \ + throw Exception(ErrorCodes::BAD_ARGUMENTS, \ + "The argument №{} of storage Kafka " \ + "and the parameter '{}' " \ "in SETTINGS cannot be specified at the same time", \ - ErrorCodes::BAD_ARGUMENTS); \ + #ARG_NUM, #PAR_NAME); \ } \ /* move engine args to settings */ \ else \ @@ -858,25 +859,28 @@ void registerStorageKafka(StorageFactory & factory) if (!args.getLocalContext()->getSettingsRef().kafka_disable_num_consumers_limit && num_consumers > max_consumers) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "The number of consumers can not be bigger than {}. " - "A single consumer can read any number of partitions. Extra consumers are relatively expensive, " - "and using a lot of them can lead to high memory and CPU usage. To achieve better performance " + "A single consumer can read any number of partitions. " + "Extra consumers are relatively expensive, " + "and using a lot of them can lead to high memory and CPU usage. " + "To achieve better performance " "of getting data from Kafka, consider using a setting kafka_thread_per_consumer=1, " - "and ensure you have enough threads in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " + "and ensure you have enough threads " + "in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " "See also https://clickhouse.com/docs/integrations/kafka/kafka-table-engine#tuning-performance", max_consumers); } else if (num_consumers < 1) { - throw Exception("Number of consumers can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of consumers can not be lower than 1"); } if (kafka_settings->kafka_max_block_size.changed && kafka_settings->kafka_max_block_size.value < 1) { - throw Exception("kafka_max_block_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_max_block_size can not be lower than 1"); } if (kafka_settings->kafka_poll_max_batch_size.changed && kafka_settings->kafka_poll_max_batch_size.value < 1) { - throw Exception("kafka_poll_max_batch_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1"); } return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(kafka_settings), collection_name); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index f100f129cda..c407cef627d 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -61,7 +61,7 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) /// additional_column is constant property It should never be lost. if (additional_column.has_value() && !other.additional_column.has_value()) - throw Exception("Wrong key assignment, losing additional_column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong key assignment, losing additional_column"); additional_column = other.additional_column; return *this; } diff --git a/src/Storages/LiveView/LiveViewCommands.h b/src/Storages/LiveView/LiveViewCommands.h index ebf196cea76..2bb2dfb2752 100644 --- a/src/Storages/LiveView/LiveViewCommands.h +++ b/src/Storages/LiveView/LiveViewCommands.h @@ -58,7 +58,7 @@ public: void validate(const IStorage & table) { if (!empty() && !dynamic_cast(&table)) - throw Exception("Wrong storage type. Must be StorageLiveView", DB::ErrorCodes::UNKNOWN_STORAGE); + throw Exception(DB::ErrorCodes::UNKNOWN_STORAGE, "Wrong storage type. Must be StorageLiveView"); } }; diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index c92968e4bcc..8f36ea4d91d 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -79,10 +79,11 @@ static StorageID extractDependentTable(ASTPtr & query, ContextPtr context, const { auto * ast_select = subquery->as(); if (!ast_select) - throw Exception("LIVE VIEWs are only supported for queries from tables, but there is no table name in select query.", - DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW); + throw Exception(DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, + "LIVE VIEWs are only supported for queries from tables, " + "but there is no table name in select query."); if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, "UNION is not supported for LIVE VIEW"); inner_subquery = ast_select->list_of_selects->children.at(0)->clone(); @@ -293,11 +294,11 @@ StorageLiveView::StorageLiveView( setInMemoryMetadata(storage_metadata); if (!query.select) - throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// Default value, if only table name exist in the query if (query.select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, "UNION is not supported for LIVE VIEW"); inner_query = query.select->list_of_selects->children.at(0); @@ -469,7 +470,6 @@ void StorageLiveView::drop() DatabaseCatalog::instance().removeViewDependency(select_table_id, table_id); std::lock_guard lock(mutex); - is_dropped = true; condition.notify_all(); } @@ -607,9 +607,8 @@ void registerStorageLiveView(StorageFactory & factory) factory.registerStorage("LiveView", [](const StorageFactory::Arguments & args) { if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_live_view) - throw Exception( - "Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')"); return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.comment); }); diff --git a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp index 0777f43aaae..bbbca009373 100644 --- a/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp +++ b/src/Storages/MeiliSearch/MeiliSearchColumnDescriptionFetcher.cpp @@ -69,7 +69,7 @@ ColumnsDescription MeiliSearchColumnDescriptionFetcher::fetchColumnsDescription( JSON jres = JSON(response).begin(); if (jres.getName() == "message") - throw Exception(ErrorCodes::MEILISEARCH_EXCEPTION, jres.getValue().toString()); + throw Exception::createRuntime(ErrorCodes::MEILISEARCH_EXCEPTION, jres.getValue().toString()); NamesAndTypesList list; diff --git a/src/Storages/MeiliSearch/MeiliSearchConnection.cpp b/src/Storages/MeiliSearch/MeiliSearchConnection.cpp index 8e271c3acf3..e34688a6364 100644 --- a/src/Storages/MeiliSearch/MeiliSearchConnection.cpp +++ b/src/Storages/MeiliSearch/MeiliSearchConnection.cpp @@ -52,7 +52,7 @@ String MeiliSearchConnection::execPostQuery(const String & url, std::string_view return response_buffer; } else - throw Exception(ErrorCodes::NETWORK_ERROR, res.getReason()); + throw Exception::createRuntime(ErrorCodes::NETWORK_ERROR, res.getReason()); } String MeiliSearchConnection::execGetQuery(const String & url, const std::unordered_map & query_params) const @@ -86,7 +86,7 @@ String MeiliSearchConnection::execGetQuery(const String & url, const std::unorde return response_buffer; } else - throw Exception(ErrorCodes::NETWORK_ERROR, res.getReason()); + throw Exception::createRuntime(ErrorCodes::NETWORK_ERROR, res.getReason()); } diff --git a/src/Storages/MeiliSearch/SinkMeiliSearch.cpp b/src/Storages/MeiliSearch/SinkMeiliSearch.cpp index e04275dfe50..32626278bd4 100644 --- a/src/Storages/MeiliSearch/SinkMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/SinkMeiliSearch.cpp @@ -53,7 +53,7 @@ void SinkMeiliSearch::writeBlockData(const Block & block) const auto response = connection.updateQuery(vbuf); auto jres = JSON(response).begin(); if (jres.getName() == "message") - throw Exception(ErrorCodes::MEILISEARCH_EXCEPTION, jres.getValue().toString()); + throw Exception::createRuntime(ErrorCodes::MEILISEARCH_EXCEPTION, jres.getValue().toString()); } void SinkMeiliSearch::consume(Chunk chunk) diff --git a/src/Storages/MeiliSearch/SourceMeiliSearch.cpp b/src/Storages/MeiliSearch/SourceMeiliSearch.cpp index b516ad8d0cf..f567af23a12 100644 --- a/src/Storages/MeiliSearch/SourceMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/SourceMeiliSearch.cpp @@ -152,8 +152,7 @@ Field getField(JSON value, DataTypePtr type_ptr) else { const std::string_view type_name = magic_enum::enum_name(type_id); - const String err_msg = "MeiliSearch storage doesn't support type: "; - throw Exception(ErrorCodes::UNSUPPORTED_MEILISEARCH_TYPE, err_msg + type_name.data()); + throw Exception(ErrorCodes::UNSUPPORTED_MEILISEARCH_TYPE, "MeiliSearch storage doesn't support type: {}", type_name); } } @@ -181,7 +180,7 @@ size_t MeiliSearchSource::parseJSON(MutableColumns & columns, const JSON & jres) } if (cnt_fields != columns.size()) throw Exception( - ErrorCodes::MEILISEARCH_MISSING_SOME_COLUMNS, "Some columns were not found in the table, json = " + json.toString()); + ErrorCodes::MEILISEARCH_MISSING_SOME_COLUMNS, "Some columns were not found in the table, json = {}", json.toString()); } return cnt_match; } @@ -201,7 +200,7 @@ Chunk MeiliSearchSource::generate() auto response = connection.searchQuery(query_params); JSON jres = JSON(response).begin(); if (jres.getName() == "message") - throw Exception(ErrorCodes::MEILISEARCH_EXCEPTION, jres.toString()); + throw Exception::createRuntime(ErrorCodes::MEILISEARCH_EXCEPTION, jres.toString()); cnt_match = parseJSON(columns, jres.getValue()); } @@ -212,7 +211,7 @@ Chunk MeiliSearchSource::generate() if (!jres.isArray()) { auto error = jres.getWithDefault("message"); - throw Exception(ErrorCodes::MEILISEARCH_EXCEPTION, error); + throw Exception::createRuntime(ErrorCodes::MEILISEARCH_EXCEPTION, error); } cnt_match = parseJSON(columns, jres); } diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index 3eca27ef3a8..56dad2a0d13 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -101,7 +101,7 @@ Pipe StorageMeiliSearch::read( auto str = el->getColumnName(); auto it = find(str.begin(), str.end(), '='); if (it == str.end()) - throw Exception("meiliMatch function must have parameters of the form \'key=value\'", ErrorCodes::BAD_QUERY_PARAMETER); + throw Exception(ErrorCodes::BAD_QUERY_PARAMETER, "meiliMatch function must have parameters of the form \'key=value\'"); String key(str.begin() + 1, it); String value(it + 1, str.end() - 1); @@ -139,8 +139,8 @@ MeiliSearchConfiguration StorageMeiliSearch::getConfiguration(ASTs engine_args, if (url.empty() || index.empty()) { - throw Exception( - "Storage MeiliSearch requires 3 parameters: MeiliSearch('url', 'index', 'key'= \"\")", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Storage MeiliSearch requires 3 parameters: MeiliSearch('url', 'index', 'key'= \"\")"); } return MeiliSearchConfiguration(url, index, key); @@ -149,9 +149,8 @@ MeiliSearchConfiguration StorageMeiliSearch::getConfiguration(ASTs engine_args, { if (engine_args.size() < 2 || 3 < engine_args.size()) { - throw Exception( - "Storage MeiliSearch requires 3 parameters: MeiliSearch('url', 'index', 'key'= \"\")", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage MeiliSearch requires 3 parameters: MeiliSearch('url', 'index', 'key'= \"\")"); } for (auto & engine_arg : engine_args) diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index 5fb22f4161e..5b7965bc3a0 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -47,7 +47,10 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & if (!part_info.contains(it->first)) { if (!part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartNameForLogs(), it->first.getPartNameForLogs()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Part {} intersects previous part {}. " + "It is a bug or a result of manual intervention in the ZooKeeper data.", + part_info.getPartNameForLogs(), it->first.getPartNameForLogs()); ++it; break; } @@ -70,7 +73,9 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & } if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Part {} intersects part {}. It is a bug or a result of manual intervention " + "in the ZooKeeper data.", name, it->first.getPartNameForLogs()); part_info_to_name.emplace(part_info, name); return true; diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index b65b670d87f..7a8a4cd4347 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -31,10 +31,11 @@ std::vector AsyncBlockIDsCache::getChildren() { auto zookeeper = storage.getZooKeeper(); - auto watch_callback = [&](const Coordination::WatchResponse &) + auto watch_callback = [last_time = this->last_updatetime.load() + , update_min_interval = this->update_min_interval + , task = task->shared_from_this()](const Coordination::WatchResponse &) { auto now = std::chrono::steady_clock::now(); - auto last_time = last_updatetime.load(); if (now - last_time < update_min_interval) { std::chrono::milliseconds sleep_time = std::chrono::duration_cast(update_min_interval - (now - last_time)); diff --git a/src/Storages/MergeTree/CommonANNIndexes.cpp b/src/Storages/MergeTree/CommonANNIndexes.cpp index e8b7d85e875..4b360e029e5 100644 --- a/src/Storages/MergeTree/CommonANNIndexes.cpp +++ b/src/Storages/MergeTree/CommonANNIndexes.cpp @@ -598,7 +598,7 @@ float ANNCondition::getFloatOrIntLiteralOrPanic(const RPN::iterator& iter) { return static_cast(iter->int_literal.value()); } - throw Exception("Wrong parsed AST in buildRPN\n", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Wrong parsed AST in buildRPN\n"); } } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp similarity index 59% rename from src/Storages/MergeTree/DataPartStorageOnDisk.cpp rename to src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp index 73e7ae54795..175df9b6e28 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.cpp @@ -1,17 +1,14 @@ -#include +#include #include -#include #include #include -#include +#include #include #include +#include #include #include -#include #include -#include -#include namespace DB { @@ -22,14 +19,15 @@ namespace ErrorCodes extern const int NOT_ENOUGH_SPACE; extern const int LOGICAL_ERROR; extern const int FILE_DOESNT_EXIST; + extern const int CORRUPTED_DATA; } -DataPartStorageOnDisk::DataPartStorageOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_) +DataPartStorageOnDiskBase::DataPartStorageOnDiskBase(VolumePtr volume_, std::string root_path_, std::string part_dir_) : volume(std::move(volume_)), root_path(std::move(root_path_)), part_dir(std::move(part_dir_)) { } -DataPartStorageOnDisk::DataPartStorageOnDisk( +DataPartStorageOnDiskBase::DataPartStorageOnDiskBase( VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_) : volume(std::move(volume_)) , root_path(std::move(root_path_)) @@ -39,92 +37,132 @@ DataPartStorageOnDisk::DataPartStorageOnDisk( { } -std::string DataPartStorageOnDisk::getFullPath() const +DiskPtr DataPartStorageOnDiskBase::getDisk() const +{ + return volume->getDisk(); +} + +std::string DataPartStorageOnDiskBase::getFullPath() const { return fs::path(volume->getDisk()->getPath()) / root_path / part_dir / ""; } -std::string DataPartStorageOnDisk::getRelativePath() const +std::string DataPartStorageOnDiskBase::getRelativePath() const { return fs::path(root_path) / part_dir / ""; } -void DataPartStorageOnDisk::setRelativePath(const std::string & path) +std::optional DataPartStorageOnDiskBase::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const +{ + assert(!broken || detached); + String res; + + auto full_relative_path = fs::path(root_path); + if (detached) + full_relative_path /= "detached"; + + std::optional original_checksums_content; + std::optional original_files_list; + + for (int try_no = 0; try_no < 10; ++try_no) + { + if (prefix.empty()) + res = part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); + else if (prefix.ends_with("_")) + res = prefix + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); + else + res = prefix + "_" + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); + + if (!volume->getDisk()->exists(full_relative_path / res)) + return res; + + /// If part with compacted storage is broken then we probably + /// cannot read the single file with data and check its content. + if (broken + && isFullPartStorage(*this) + && looksLikeBrokenDetachedPartHasTheSameContent(res, original_checksums_content, original_files_list)) + { + LOG_WARNING(log, "Directory {} (to detach to) already exists, " + "but its content looks similar to content of the broken part which we are going to detach. " + "Assuming it was already cloned to detached, will not do it again to avoid redundant copies of broken part.", res); + return {}; + } + + LOG_WARNING(log, "Directory {} (to detach to) already exists. Will detach to directory with '_tryN' suffix.", res); + } + + return res; +} + +bool DataPartStorageOnDiskBase::looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, + std::optional & original_checksums_content, + std::optional & original_files_list) const +{ + /// We cannot know for sure that content of detached part is the same, + /// but in most cases it's enough to compare checksums.txt and list of files. + + if (!exists("checksums.txt")) + return false; + + auto storage_from_detached = create(volume, fs::path(root_path) / "detached", detached_part_path, /*initialize=*/ true); + if (!storage_from_detached->exists("checksums.txt")) + return false; + + if (!original_checksums_content) + { + auto in = storage_from_detached->readFile("checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); + original_checksums_content.emplace(); + readStringUntilEOF(*original_checksums_content, *in); + } + + if (original_checksums_content->empty()) + return false; + + String detached_checksums_content; + { + auto in = readFile("checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); + readStringUntilEOF(detached_checksums_content, *in); + } + + if (original_checksums_content != detached_checksums_content) + return false; + + if (!original_files_list) + { + original_files_list.emplace(); + for (auto it = iterate(); it->isValid(); it->next()) + original_files_list->emplace_back(it->name()); + std::sort(original_files_list->begin(), original_files_list->end()); + } + + Strings detached_files_list; + for (auto it = storage_from_detached->iterate(); it->isValid(); it->next()) + detached_files_list.emplace_back(it->name()); + std::sort(detached_files_list.begin(), detached_files_list.end()); + + return original_files_list == detached_files_list; +} + +void DataPartStorageOnDiskBase::setRelativePath(const std::string & path) { part_dir = path; } -std::string DataPartStorageOnDisk::getFullRootPath() const +std::string DataPartStorageOnDiskBase::getPartDirectory() const +{ + return part_dir; +} + +std::string DataPartStorageOnDiskBase::getFullRootPath() const { return fs::path(volume->getDisk()->getPath()) / root_path / ""; } -MutableDataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name, bool use_parent_transaction) // NOLINT -{ - return std::shared_ptr(new DataPartStorageOnDisk(volume, std::string(fs::path(root_path) / part_dir), name, use_parent_transaction ? transaction : nullptr)); -} - -DataPartStoragePtr DataPartStorageOnDisk::getProjection(const std::string & name) const -{ - return std::make_shared(volume, std::string(fs::path(root_path) / part_dir), name); -} - -bool DataPartStorageOnDisk::exists() const -{ - return volume->getDisk()->exists(fs::path(root_path) / part_dir); -} - -bool DataPartStorageOnDisk::exists(const std::string & name) const -{ - return volume->getDisk()->exists(fs::path(root_path) / part_dir / name); -} - -bool DataPartStorageOnDisk::isDirectory(const std::string & name) const -{ - return volume->getDisk()->isDirectory(fs::path(root_path) / part_dir / name); -} - -Poco::Timestamp DataPartStorageOnDisk::getLastModified() const +Poco::Timestamp DataPartStorageOnDiskBase::getLastModified() const { return volume->getDisk()->getLastModified(fs::path(root_path) / part_dir); } -class DataPartStorageIteratorOnDisk final : public IDataPartStorageIterator -{ -public: - DataPartStorageIteratorOnDisk(DiskPtr disk_, DirectoryIteratorPtr it_) - : disk(std::move(disk_)), it(std::move(it_)) - { - } - - void next() override { it->next(); } - bool isValid() const override { return it->isValid(); } - bool isFile() const override { return isValid() && disk->isFile(it->path()); } - std::string name() const override { return it->name(); } - std::string path() const override { return it->path(); } - -private: - DiskPtr disk; - DirectoryIteratorPtr it; -}; - -DataPartStorageIteratorPtr DataPartStorageOnDisk::iterate() const -{ - return std::make_unique( - volume->getDisk(), - volume->getDisk()->iterateDirectory(fs::path(root_path) / part_dir)); -} - -size_t DataPartStorageOnDisk::getFileSize(const String & file_name) const -{ - return volume->getDisk()->getFileSize(fs::path(root_path) / part_dir / file_name); -} - -UInt32 DataPartStorageOnDisk::getRefCount(const String & file_name) const -{ - return volume->getDisk()->getRefCount(fs::path(root_path) / part_dir / file_name); -} - static UInt64 calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & from) { if (disk->isFile(from)) @@ -132,44 +170,340 @@ static UInt64 calculateTotalSizeOnDiskImpl(const DiskPtr & disk, const String & std::vector files; disk->listFiles(from, files); + UInt64 res = 0; for (const auto & file : files) res += calculateTotalSizeOnDiskImpl(disk, fs::path(from) / file); + return res; } -UInt64 DataPartStorageOnDisk::calculateTotalSizeOnDisk() const +UInt64 DataPartStorageOnDiskBase::calculateTotalSizeOnDisk() const { return calculateTotalSizeOnDiskImpl(volume->getDisk(), fs::path(root_path) / part_dir); } -std::unique_ptr DataPartStorageOnDisk::readFile( - const std::string & name, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const +std::string DataPartStorageOnDiskBase::getDiskName() const { - return volume->getDisk()->readFile(fs::path(root_path) / part_dir / name, settings, read_hint, file_size); + return volume->getDisk()->getName(); } -void DataPartStorageOnDisk::checkConsistency(const MergeTreeDataPartChecksums & checksums) const +std::string DataPartStorageOnDiskBase::getDiskType() const { - checksums.checkSizes(volume->getDisk(), getRelativePath()); + return toString(volume->getDisk()->getDataSourceDescription().type); } -void DataPartStorageOnDisk::remove( +bool DataPartStorageOnDiskBase::isStoredOnRemoteDisk() const +{ + return volume->getDisk()->isRemote(); +} + +bool DataPartStorageOnDiskBase::supportZeroCopyReplication() const +{ + return volume->getDisk()->supportZeroCopyReplication(); +} + +bool DataPartStorageOnDiskBase::supportParallelWrite() const +{ + return volume->getDisk()->supportParallelWrite(); +} + +bool DataPartStorageOnDiskBase::isBroken() const +{ + return volume->getDisk()->isBroken(); +} + +void DataPartStorageOnDiskBase::syncRevision(UInt64 revision) const +{ + volume->getDisk()->syncRevision(revision); +} + +UInt64 DataPartStorageOnDiskBase::getRevision() const +{ + return volume->getDisk()->getRevision(); +} + +std::string DataPartStorageOnDiskBase::getDiskPath() const +{ + return volume->getDisk()->getPath(); +} + +ReservationPtr DataPartStorageOnDiskBase::reserve(UInt64 bytes) const +{ + auto res = volume->reserve(bytes); + if (!res) + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Cannot reserve {}, not enough space", ReadableSize(bytes)); + + return res; +} + +ReservationPtr DataPartStorageOnDiskBase::tryReserve(UInt64 bytes) const +{ + return volume->reserve(bytes); +} + +IDataPartStorage::ReplicatedFilesDescription +DataPartStorageOnDiskBase::getReplicatedFilesDescription(const NameSet & file_names) const +{ + ReplicatedFilesDescription description; + auto relative_path = fs::path(root_path) / part_dir; + auto disk = volume->getDisk(); + + auto actual_file_names = getActualFileNamesOnDisk(file_names); + for (const auto & name : actual_file_names) + { + auto path = relative_path / name; + size_t file_size = disk->getFileSize(path); + + auto & file_desc = description.files[name]; + + file_desc.file_size = file_size; + file_desc.input_buffer_getter = [disk, path, file_size] + { + return disk->readFile(path, ReadSettings{}.adjustBufferSize(file_size), file_size, file_size); + }; + } + + return description; +} + +IDataPartStorage::ReplicatedFilesDescription +DataPartStorageOnDiskBase::getReplicatedFilesDescriptionForRemoteDisk(const NameSet & file_names) const +{ + ReplicatedFilesDescription description; + auto relative_path = fs::path(root_path) / part_dir; + + auto disk = volume->getDisk(); + if (!disk->supportZeroCopyReplication()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Disk {} doesn't support zero-copy replication", disk->getName()); + + description.unique_id = getUniqueId(); + + Names paths; + auto actual_file_names = getActualFileNamesOnDisk(file_names); + + for (const auto & name : actual_file_names) + { + /// Just some additional checks + auto metadata_full_file_path = fs::path(getFullPath()) / name; + if (!fs::exists(metadata_full_file_path)) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Remote metadata '{}' is not exists", name); + if (!fs::is_regular_file(metadata_full_file_path)) + throw Exception(ErrorCodes::CORRUPTED_DATA, "Remote metadata '{}' is not a file", name); + + paths.emplace_back(relative_path / name); + } + + auto serialized_metadata = disk->getSerializedMetadata(paths); + for (const auto & name : actual_file_names) + { + auto & file_desc = description.files[name]; + const auto & metadata_str = serialized_metadata.at(relative_path / name); + + file_desc.file_size = metadata_str.size(); + file_desc.input_buffer_getter = [metadata_str] + { + return std::make_unique(metadata_str); + }; + } + + return description; +} + +void DataPartStorageOnDiskBase::backup( + const MergeTreeDataPartChecksums & checksums, + const NameSet & files_without_checksums, + const String & path_in_backup, + BackupEntries & backup_entries, + bool make_temporary_hard_links, + TemporaryFilesOnDisks * temp_dirs) const +{ + fs::path part_path_on_disk = fs::path{root_path} / part_dir; + fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; + + auto disk = volume->getDisk(); + + fs::path temp_part_dir; + std::shared_ptr temp_dir_owner; + if (make_temporary_hard_links) + { + assert(temp_dirs); + auto temp_dir_it = temp_dirs->find(disk); + if (temp_dir_it == temp_dirs->end()) + temp_dir_it = temp_dirs->emplace(disk, std::make_shared(disk, "tmp/")).first; + + temp_dir_owner = temp_dir_it->second; + fs::path temp_dir = temp_dir_owner->getPath(); + temp_part_dir = temp_dir / part_path_in_backup.relative_path(); + disk->createDirectories(temp_part_dir); + } + + /// For example, + /// part_path_in_backup = /data/test/table/0_1_1_0 + /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0 + /// tmp_part_dir = tmp/1aaaaaa/data/test/table/0_1_1_0 + /// Or, for projections: + /// part_path_in_backup = /data/test/table/0_1_1_0/prjmax.proj + /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0/prjmax.proj + /// tmp_part_dir = tmp/1aaaaaa/data/test/table/0_1_1_0/prjmax.proj + + auto files_to_backup = files_without_checksums; + for (const auto & [name, _] : checksums.files) + { + if (!name.ends_with(".proj")) + files_to_backup.insert(name); + } + + files_to_backup = getActualFileNamesOnDisk(files_to_backup); + + for (const auto & filepath : files_to_backup) + { + auto filepath_on_disk = part_path_on_disk / filepath; + auto filepath_in_backup = part_path_in_backup / filepath; + + if (files_without_checksums.contains(filepath)) + { + backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk)); + continue; + } + + if (make_temporary_hard_links) + { + String hardlink_filepath = temp_part_dir / filepath; + disk->createHardLink(filepath_on_disk, hardlink_filepath); + filepath_on_disk = hardlink_filepath; + } + + std::optional file_size; + std::optional file_hash; + + auto it = checksums.files.find(filepath); + if (it != checksums.files.end()) + { + file_size = it->second.file_size; + file_hash = {it->second.file_hash.first, it->second.file_hash.second}; + } + + backup_entries.emplace_back( + filepath_in_backup, + std::make_unique(disk, filepath_on_disk, file_size, file_hash, temp_dir_owner)); + } +} + +MutableDataPartStoragePtr DataPartStorageOnDiskBase::freeze( + const std::string & to, + const std::string & dir_path, + bool make_source_readonly, + std::function save_metadata_callback, + bool copy_instead_of_hardlink, + const NameSet & files_to_copy_instead_of_hardlinks) const +{ + auto disk = volume->getDisk(); + disk->createDirectories(to); + + localBackup(disk, getRelativePath(), fs::path(to) / dir_path, make_source_readonly, {}, copy_instead_of_hardlink, files_to_copy_instead_of_hardlinks); + + if (save_metadata_callback) + save_metadata_callback(disk); + + disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); + disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); + + auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); + + /// Do not initialize storage in case of DETACH because part may be broken. + bool to_detached = dir_path.starts_with("detached/"); + return create(single_disk_volume, to, dir_path, /*initialize=*/ !to_detached); +} + +MutableDataPartStoragePtr DataPartStorageOnDiskBase::clonePart( + const std::string & to, + const std::string & dir_path, + const DiskPtr & disk, + Poco::Logger * log) const +{ + String path_to_clone = fs::path(to) / dir_path / ""; + + if (disk->exists(path_to_clone)) + { + LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone)); + disk->removeRecursive(path_to_clone); + } + + disk->createDirectories(to); + volume->getDisk()->copy(getRelativePath(), disk, to); + volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); + + auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); + return create(single_disk_volume, to, dir_path, /*initialize=*/ true); +} + +void DataPartStorageOnDiskBase::rename( + std::string new_root_path, + std::string new_part_dir, + Poco::Logger * log, + bool remove_new_dir_if_exists, + bool fsync_part_dir) +{ + if (new_root_path.ends_with('/')) + new_root_path.pop_back(); + if (new_part_dir.ends_with('/')) + new_part_dir.pop_back(); + + String to = fs::path(new_root_path) / new_part_dir / ""; + + if (volume->getDisk()->exists(to)) + { + if (remove_new_dir_if_exists) + { + Names files; + volume->getDisk()->listFiles(to, files); + + if (log) + LOG_WARNING(log, + "Part directory {} already exists and contains {} files. Removing it.", + fullPath(volume->getDisk(), to), files.size()); + + executeWriteOperation([&](auto & disk) { disk.removeRecursive(to); }); + } + else + { + throw Exception( + ErrorCodes::DIRECTORY_ALREADY_EXISTS, + "Part directory {} already exists", + fullPath(volume->getDisk(), to)); + } + } + + String from = getRelativePath(); + + /// Why? + executeWriteOperation([&](auto & disk) + { + disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); + disk.moveDirectory(from, to); + + /// Only after moveDirectory() since before the directory does not exists. + SyncGuardPtr to_sync_guard; + if (fsync_part_dir) + to_sync_guard = volume->getDisk()->getDirectorySyncGuard(to); + }); + + part_dir = new_part_dir; + root_path = new_root_path; +} + +void DataPartStorageOnDiskBase::remove( CanRemoveCallback && can_remove_callback, const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - MergeTreeDataPartState state, Poco::Logger * log) { /// NOTE We rename part to delete_tmp_ instead of delete_tmp_ to avoid race condition /// when we try to remove two parts with the same name, but different relative paths, /// for example all_1_2_1 (in Deleting state) and tmp_merge_all_1_2_1 (in Temporary state). fs::path from = fs::path(root_path) / part_dir; - // fs::path to = fs::path(root_path) / ("delete_tmp_" + part_dir); // TODO directory delete_tmp_ is never removed if server crashes before returning from this function /// Cut last "/" if it exists (it shouldn't). Otherwise fs::path behave differently. @@ -262,15 +596,18 @@ void DataPartStorageOnDisk::remove( NameSet files_not_to_remove_for_projection; for (const auto & file_name : can_remove_description->files_not_to_remove) - { if (file_name.starts_with(proj_dir_name)) files_not_to_remove_for_projection.emplace(fs::path(file_name).filename()); - } + LOG_DEBUG(log, "Will not remove files [{}] for projection {}", fmt::join(files_not_to_remove_for_projection, ", "), projection.name); - clearDirectory( - fs::path(to) / proj_dir_name, - can_remove_description->can_remove_anything, files_not_to_remove_for_projection, projection.checksums, {}, is_temp, state, log, true); + CanRemoveDescription proj_description + { + can_remove_description->can_remove_anything, + std::move(files_not_to_remove_for_projection), + }; + + clearDirectory(fs::path(to) / proj_dir_name, proj_description, projection.checksums, is_temp, log); } /// It is possible that we are removing the part which have a written but not loaded projection. @@ -279,55 +616,49 @@ void DataPartStorageOnDisk::remove( /// See test 01701_clear_projection_and_part. for (const auto & [name, _] : checksums.files) { - if (endsWith(name, proj_suffix) && !projection_directories.contains(name) && disk->isDirectory(fs::path(to) / name)) + if (endsWith(name, proj_suffix) && !projection_directories.contains(name)) { + static constexpr auto checksums_name = "checksums.txt"; + auto projection_storage = create(volume, to, name, /*initialize=*/ true); /// If we have a directory with suffix '.proj' it is likely a projection. /// Try to load checksums for it (to avoid recursive removing fallback). - std::string checksum_path = fs::path(to) / name / "checksums.txt"; - if (disk->exists(checksum_path)) + if (projection_storage->exists(checksums_name)) { try { MergeTreeDataPartChecksums tmp_checksums; - auto in = disk->readFile(checksum_path, {}); + auto in = projection_storage->readFile(checksums_name, {}, {}, {}); tmp_checksums.read(*in); - projection_directories.emplace(name); - - clearDirectory( - fs::path(to) / name, - can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove, tmp_checksums, {}, is_temp, state, log, true); + clearDirectory(fs::path(to) / name, *can_remove_description, tmp_checksums, is_temp, log); } catch (...) { - LOG_ERROR(log, "Cannot load checksums from {}", checksum_path); + LOG_ERROR(log, "Cannot load checksums from {}", fs::path(projection_storage->getRelativePath()) / checksums_name); } } } } - clearDirectory(to, can_remove_description->can_remove_anything, can_remove_description->files_not_to_remove, checksums, projection_directories, is_temp, state, log, false); + clearDirectory(to, *can_remove_description, checksums, is_temp, log); } -void DataPartStorageOnDisk::clearDirectory( +void DataPartStorageOnDiskBase::clearDirectory( const std::string & dir, - bool can_remove_shared_data, - const NameSet & names_not_to_remove, + const CanRemoveDescription & can_remove_description, const MergeTreeDataPartChecksums & checksums, - const std::unordered_set & skip_directories, bool is_temp, - MergeTreeDataPartState state, - Poco::Logger * log, - bool is_projection) const + Poco::Logger * log) { auto disk = volume->getDisk(); + auto [can_remove_shared_data, names_not_to_remove] = can_remove_description; + names_not_to_remove = getActualFileNamesOnDisk(names_not_to_remove); /// It does not make sense to try fast path for incomplete temporary parts, because some files are probably absent. /// Sometimes we add something to checksums.files before actually writing checksums and columns on disk. /// Also sometimes we write checksums.txt and columns.txt in arbitrary order, so this check becomes complex... - bool is_temporary_part = is_temp || state == MergeTreeDataPartState::Temporary; - bool incomplete_temporary_part = is_temporary_part && (!disk->exists(fs::path(dir) / "checksums.txt") || !disk->exists(fs::path(dir) / "columns.txt")); + bool incomplete_temporary_part = is_temp && (!disk->exists(fs::path(dir) / "checksums.txt") || !disk->exists(fs::path(dir) / "columns.txt")); if (checksums.empty() || incomplete_temporary_part) { /// If the part is not completely written, we cannot use fast path by listing files. @@ -337,23 +668,21 @@ void DataPartStorageOnDisk::clearDirectory( try { + NameSet names_to_remove = {"checksums.txt", "columns.txt"}; + for (const auto & [file, _] : checksums.files) + if (!endsWith(file, ".proj")) + names_to_remove.emplace(file); + + names_to_remove = getActualFileNamesOnDisk(names_to_remove); + /// Remove each expected file in directory, then remove directory itself. RemoveBatchRequest request; - - for (const auto & [file, _] : checksums.files) - { - if (skip_directories.find(file) == skip_directories.end()) - request.emplace_back(fs::path(dir) / file); - } - - for (const auto & file : {"checksums.txt", "columns.txt"}) + for (const auto & file : names_to_remove) request.emplace_back(fs::path(dir) / file); request.emplace_back(fs::path(dir) / "default_compression_codec.txt", true); request.emplace_back(fs::path(dir) / "delete-on-destroy.txt", true); - - if (!is_projection) - request.emplace_back(fs::path(dir) / "txn_version.txt", true); + request.emplace_back(fs::path(dir) / "txn_version.txt", true); disk->removeSharedFiles(request, !can_remove_shared_data, names_not_to_remove); disk->removeDirectory(dir); @@ -367,330 +696,7 @@ void DataPartStorageOnDisk::clearDirectory( } } -std::optional DataPartStorageOnDisk::getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const -{ - assert(!broken || detached); - String res; - - auto full_relative_path = fs::path(root_path); - if (detached) - full_relative_path /= "detached"; - - std::optional original_checksums_content; - std::optional original_files_list; - - for (int try_no = 0; try_no < 10; ++try_no) - { - if (prefix.empty()) - res = part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); - else if (prefix.ends_with("_")) - res = prefix + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); - else - res = prefix + "_" + part_dir + (try_no ? "_try" + DB::toString(try_no) : ""); - - if (!volume->getDisk()->exists(full_relative_path / res)) - return res; - - if (broken && looksLikeBrokenDetachedPartHasTheSameContent(res, original_checksums_content, original_files_list)) - { - LOG_WARNING(log, "Directory {} (to detach to) already exists, " - "but its content looks similar to content of the broken part which we are going to detach. " - "Assuming it was already cloned to detached, will not do it again to avoid redundant copies of broken part.", res); - return {}; - } - - LOG_WARNING(log, "Directory {} (to detach to) already exists. Will detach to directory with '_tryN' suffix.", res); - } - - return res; -} - -bool DataPartStorageOnDisk::looksLikeBrokenDetachedPartHasTheSameContent(const String & detached_part_path, - std::optional & original_checksums_content, - std::optional & original_files_list) const -{ - /// We cannot know for sure that content of detached part is the same, - /// but in most cases it's enough to compare checksums.txt and list of files. - - if (!exists("checksums.txt")) - return false; - - auto detached_full_path = fs::path(root_path) / "detached" / detached_part_path; - auto disk = volume->getDisk(); - if (!disk->exists(detached_full_path / "checksums.txt")) - return false; - - if (!original_checksums_content) - { - auto in = disk->readFile(detached_full_path / "checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); - original_checksums_content.emplace(); - readStringUntilEOF(*original_checksums_content, *in); - } - - if (original_checksums_content->empty()) - return false; - - auto part_full_path = fs::path(root_path) / part_dir; - String detached_checksums_content; - { - auto in = readFile("checksums.txt", /* settings */ {}, /* read_hint */ {}, /* file_size */ {}); - readStringUntilEOF(detached_checksums_content, *in); - } - - if (original_checksums_content != detached_checksums_content) - return false; - - if (!original_files_list) - { - original_files_list.emplace(); - disk->listFiles(part_full_path, *original_files_list); - std::sort(original_files_list->begin(), original_files_list->end()); - } - - Strings detached_files_list; - disk->listFiles(detached_full_path, detached_files_list); - std::sort(detached_files_list.begin(), detached_files_list.end()); - - return original_files_list == detached_files_list; -} - -std::string DataPartStorageOnDisk::getDiskName() const -{ - return volume->getDisk()->getName(); -} - -std::string DataPartStorageOnDisk::getDiskType() const -{ - return toString(volume->getDisk()->getDataSourceDescription().type); -} - -bool DataPartStorageOnDisk::isStoredOnRemoteDisk() const -{ - return volume->getDisk()->isRemote(); -} - -bool DataPartStorageOnDisk::supportZeroCopyReplication() const -{ - return volume->getDisk()->supportZeroCopyReplication(); -} - -bool DataPartStorageOnDisk::supportParallelWrite() const -{ - return volume->getDisk()->supportParallelWrite(); -} - -bool DataPartStorageOnDisk::isBroken() const -{ - return volume->getDisk()->isBroken(); -} - -void DataPartStorageOnDisk::syncRevision(UInt64 revision) const -{ - volume->getDisk()->syncRevision(revision); -} - -UInt64 DataPartStorageOnDisk::getRevision() const -{ - return volume->getDisk()->getRevision(); -} - -std::unordered_map DataPartStorageOnDisk::getSerializedMetadata(const std::vector & paths) const -{ - return volume->getDisk()->getSerializedMetadata(paths); -} - -std::string DataPartStorageOnDisk::getDiskPath() const -{ - return volume->getDisk()->getPath(); -} - -ReservationPtr DataPartStorageOnDisk::reserve(UInt64 bytes) const -{ - auto res = volume->reserve(bytes); - if (!res) - throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Cannot reserve {}, not enough space", ReadableSize(bytes)); - - return res; -} - -ReservationPtr DataPartStorageOnDisk::tryReserve(UInt64 bytes) const -{ - return volume->reserve(bytes); -} - -String DataPartStorageOnDisk::getUniqueId() const -{ - auto disk = volume->getDisk(); - if (!disk->supportZeroCopyReplication()) - throw Exception(fmt::format("Disk {} doesn't support zero-copy replication", disk->getName()), ErrorCodes::LOGICAL_ERROR); - - return disk->getUniqueId(fs::path(getRelativePath()) / "checksums.txt"); -} - -void DataPartStorageOnDisk::backup( - const MergeTreeDataPartChecksums & checksums, - const NameSet & files_without_checksums, - const String & path_in_backup, - BackupEntries & backup_entries, - bool make_temporary_hard_links, - TemporaryFilesOnDisks * temp_dirs) const -{ - fs::path part_path_on_disk = fs::path{root_path} / part_dir; - fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; - - auto disk = volume->getDisk(); - - fs::path temp_part_dir; - std::shared_ptr temp_dir_owner; - if (make_temporary_hard_links) - { - assert(temp_dirs); - auto temp_dir_it = temp_dirs->find(disk); - if (temp_dir_it == temp_dirs->end()) - temp_dir_it = temp_dirs->emplace(disk, std::make_shared(disk, "tmp/")).first; - temp_dir_owner = temp_dir_it->second; - fs::path temp_dir = temp_dir_owner->getPath(); - temp_part_dir = temp_dir / part_path_in_backup.relative_path(); - disk->createDirectories(temp_part_dir); - } - - /// For example, - /// part_path_in_backup = /data/test/table/0_1_1_0 - /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0 - /// tmp_part_dir = tmp/1aaaaaa/data/test/table/0_1_1_0 - /// Or, for projections: - /// part_path_in_backup = /data/test/table/0_1_1_0/prjmax.proj - /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0/prjmax.proj - /// tmp_part_dir = tmp/1aaaaaa/data/test/table/0_1_1_0/prjmax.proj - - for (const auto & [filepath, checksum] : checksums.files) - { - if (filepath.ends_with(".proj")) - continue; /// Skip *.proj files - they're actually directories and will be handled. - String filepath_on_disk = part_path_on_disk / filepath; - String filepath_in_backup = part_path_in_backup / filepath; - - if (make_temporary_hard_links) - { - String hardlink_filepath = temp_part_dir / filepath; - disk->createHardLink(filepath_on_disk, hardlink_filepath); - filepath_on_disk = hardlink_filepath; - } - - UInt128 file_hash{checksum.file_hash.first, checksum.file_hash.second}; - backup_entries.emplace_back( - filepath_in_backup, - std::make_unique(disk, filepath_on_disk, checksum.file_size, file_hash, temp_dir_owner)); - } - - for (const auto & filepath : files_without_checksums) - { - String filepath_on_disk = part_path_on_disk / filepath; - String filepath_in_backup = part_path_in_backup / filepath; - backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk)); - } -} - -MutableDataPartStoragePtr DataPartStorageOnDisk::freeze( - const std::string & to, - const std::string & dir_path, - bool make_source_readonly, - std::function save_metadata_callback, - bool copy_instead_of_hardlink, - const NameSet & files_to_copy_instead_of_hardlinks) const - -{ - auto disk = volume->getDisk(); - disk->createDirectories(to); - - localBackup(disk, getRelativePath(), fs::path(to) / dir_path, make_source_readonly, {}, copy_instead_of_hardlink, files_to_copy_instead_of_hardlinks); - - if (save_metadata_callback) - save_metadata_callback(disk); - - disk->removeFileIfExists(fs::path(to) / dir_path / "delete-on-destroy.txt"); - disk->removeFileIfExists(fs::path(to) / dir_path / "txn_version.txt"); - - auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); - return std::make_shared(single_disk_volume, to, dir_path); -} - -MutableDataPartStoragePtr DataPartStorageOnDisk::clonePart( - const std::string & to, - const std::string & dir_path, - const DiskPtr & disk, - Poco::Logger * log) const -{ - String path_to_clone = fs::path(to) / dir_path / ""; - - if (disk->exists(path_to_clone)) - { - LOG_WARNING(log, "Path {} already exists. Will remove it and clone again.", fullPath(disk, path_to_clone)); - disk->removeRecursive(path_to_clone); - } - - disk->createDirectories(to); - volume->getDisk()->copy(getRelativePath(), disk, to); - volume->getDisk()->removeFileIfExists(fs::path(path_to_clone) / "delete-on-destroy.txt"); - - auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); - return std::make_shared(single_disk_volume, to, dir_path); -} - -void DataPartStorageOnDisk::rename( - std::string new_root_path, - std::string new_part_dir, - Poco::Logger * log, - bool remove_new_dir_if_exists, - bool fsync_part_dir) -{ - if (new_root_path.ends_with('/')) - new_root_path.pop_back(); - if (new_part_dir.ends_with('/')) - new_part_dir.pop_back(); - - String to = fs::path(new_root_path) / new_part_dir / ""; - - if (volume->getDisk()->exists(to)) - { - if (remove_new_dir_if_exists) - { - Names files; - volume->getDisk()->listFiles(to, files); - - if (log) - LOG_WARNING(log, - "Part directory {} already exists and contains {} files. Removing it.", - fullPath(volume->getDisk(), to), files.size()); - - executeOperation([&](auto & disk) { disk.removeRecursive(to); }); - } - else - { - throw Exception( - ErrorCodes::DIRECTORY_ALREADY_EXISTS, - "Part directory {} already exists", - fullPath(volume->getDisk(), to)); - } - } - String from = getRelativePath(); - - /// Why? - executeOperation([&](auto & disk) - { - disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); - disk.moveDirectory(from, to); - }); - - part_dir = new_part_dir; - root_path = new_root_path; - - SyncGuardPtr sync_guard; - if (fsync_part_dir) - sync_guard = volume->getDisk()->getDirectorySyncGuard(getRelativePath()); -} - -void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const std::string & to_root) +void DataPartStorageOnDiskBase::changeRootPath(const std::string & from_root, const std::string & to_root) { /// This is a very dumb implementation, here for root path like /// "some/current/path/to/part" and change like @@ -716,136 +722,34 @@ void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const root_path = to_root.substr(0, dst_size) + root_path.substr(prefix_size); } -SyncGuardPtr DataPartStorageOnDisk::getDirectorySyncGuard() const +SyncGuardPtr DataPartStorageOnDiskBase::getDirectorySyncGuard() const { return volume->getDisk()->getDirectorySyncGuard(fs::path(root_path) / part_dir); } -template -void DataPartStorageOnDisk::executeOperation(Op && op) -{ - if (transaction) - op(*transaction); - else - op(*volume->getDisk()); -} - -std::unique_ptr DataPartStorageOnDisk::writeFile( - const String & name, - size_t buf_size, - const WriteSettings & settings) -{ - if (transaction) - return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings, /* autocommit = */ false); - - return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings); -} - -std::unique_ptr DataPartStorageOnDisk::writeFile( - const String & name, - size_t buf_size, - WriteMode mode, - const WriteSettings & settings) -{ - if (transaction) - return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, mode, settings, /* autocommit = */ false); - return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, mode, settings); -} - -std::unique_ptr DataPartStorageOnDisk::writeTransactionFile(WriteMode mode) const +std::unique_ptr DataPartStorageOnDiskBase::writeTransactionFile(WriteMode mode) const { return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / "txn_version.txt", 256, mode); } -void DataPartStorageOnDisk::createFile(const String & name) +void DataPartStorageOnDiskBase::removeRecursive() { - executeOperation([&](auto & disk) { disk.createFile(fs::path(root_path) / part_dir / name); }); + executeWriteOperation([&](auto & disk) { disk.removeRecursive(fs::path(root_path) / part_dir); }); } -void DataPartStorageOnDisk::moveFile(const String & from_name, const String & to_name) +void DataPartStorageOnDiskBase::removeSharedRecursive(bool keep_in_remote_fs) { - executeOperation([&](auto & disk) - { - auto relative_path = fs::path(root_path) / part_dir; - disk.moveFile(relative_path / from_name, relative_path / to_name); - }); + executeWriteOperation([&](auto & disk) { disk.removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {}); }); } -void DataPartStorageOnDisk::replaceFile(const String & from_name, const String & to_name) +void DataPartStorageOnDiskBase::createDirectories() { - executeOperation([&](auto & disk) - { - auto relative_path = fs::path(root_path) / part_dir; - disk.replaceFile(relative_path / from_name, relative_path / to_name); - }); + executeWriteOperation([&](auto & disk) { disk.createDirectories(fs::path(root_path) / part_dir); }); } -void DataPartStorageOnDisk::removeFile(const String & name) +bool DataPartStorageOnDiskBase::hasActiveTransaction() const { - executeOperation([&](auto & disk) { disk.removeFile(fs::path(root_path) / part_dir / name); }); -} - -void DataPartStorageOnDisk::removeFileIfExists(const String & name) -{ - executeOperation([&](auto & disk) { disk.removeFileIfExists(fs::path(root_path) / part_dir / name); }); -} - -void DataPartStorageOnDisk::removeRecursive() -{ - executeOperation([&](auto & disk) { disk.removeRecursive(fs::path(root_path) / part_dir); }); -} - -void DataPartStorageOnDisk::removeSharedRecursive(bool keep_in_remote_fs) -{ - executeOperation([&](auto & disk) { disk.removeSharedRecursive(fs::path(root_path) / part_dir, keep_in_remote_fs, {}); }); -} - -void DataPartStorageOnDisk::createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) -{ - const auto * source_on_disk = typeid_cast(&source); - if (!source_on_disk) - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Cannot create hardlink from different storage. Expected DataPartStorageOnDisk, got {}", - typeid(source).name()); - - executeOperation([&](auto & disk) - { - disk.createHardLink( - fs::path(source_on_disk->getRelativePath()) / from, - fs::path(root_path) / part_dir / to); - }); -} - -void DataPartStorageOnDisk::createDirectories() -{ - executeOperation([&](auto & disk) { disk.createDirectories(fs::path(root_path) / part_dir); }); -} - -void DataPartStorageOnDisk::createProjection(const std::string & name) -{ - executeOperation([&](auto & disk) { disk.createDirectory(fs::path(root_path) / part_dir / name); }); -} - -void DataPartStorageOnDisk::beginTransaction() -{ - if (transaction) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Uncommitted {}transaction already exists", has_shared_transaction ? "shared " : ""); - - transaction = volume->getDisk()->createTransaction(); -} - -void DataPartStorageOnDisk::commitTransaction() -{ - if (!transaction) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no uncommitted transaction"); - - if (has_shared_transaction) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot commit shared transaction"); - - transaction->commit(); - transaction.reset(); + return transaction != nullptr; } } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h similarity index 57% rename from src/Storages/MergeTree/DataPartStorageOnDisk.h rename to src/Storages/MergeTree/DataPartStorageOnDiskBase.h index 3e82d44d71e..7c408dcf381 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDiskBase.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include @@ -10,48 +11,19 @@ namespace DB class IVolume; using VolumePtr = std::shared_ptr; - -class DataPartStorageOnDisk final : public IDataPartStorage +class DataPartStorageOnDiskBase : public IDataPartStorage { public: - DataPartStorageOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_); + DataPartStorageOnDiskBase(VolumePtr volume_, std::string root_path_, std::string part_dir_); std::string getFullPath() const override; std::string getRelativePath() const override; - std::string getPartDirectory() const override { return part_dir; } + std::string getPartDirectory() const override; std::string getFullRootPath() const override; - MutableDataPartStoragePtr getProjection(const std::string & name, bool use_parent_transaction = true) override; // NOLINT - DataPartStoragePtr getProjection(const std::string & name) const override; - - bool exists() const override; - bool exists(const std::string & name) const override; - bool isDirectory(const std::string & name) const override; - Poco::Timestamp getLastModified() const override; - DataPartStorageIteratorPtr iterate() const override; - - size_t getFileSize(const std::string & file_name) const override; - UInt32 getRefCount(const std::string & file_name) const override; - UInt64 calculateTotalSizeOnDisk() const override; - std::unique_ptr readFile( - const std::string & name, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size) const override; - - void checkConsistency(const MergeTreeDataPartChecksums & checksums) const override; - - void remove( - CanRemoveCallback && can_remove_callback, - const MergeTreeDataPartChecksums & checksums, - std::list projections, - bool is_temp, - MergeTreeDataPartState state, - Poco::Logger * log) override; - /// Returns path to place detached part in or nullopt if we don't need to detach part (if it already exists and has the same content) std::optional getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached, bool broken) const override; @@ -69,11 +41,12 @@ public: bool isBroken() const override; void syncRevision(UInt64 revision) const override; UInt64 getRevision() const override; - std::unordered_map getSerializedMetadata(const std::vector & paths) const override; std::string getDiskPath() const override; ReservationPtr reserve(UInt64 bytes) const override; ReservationPtr tryReserve(UInt64 bytes) const override; - String getUniqueId() const override; + + ReplicatedFilesDescription getReplicatedFilesDescription(const NameSet & file_names) const override; + ReplicatedFilesDescription getReplicatedFilesDescriptionForRemoteDisk(const NameSet & file_names) const override; void backup( const MergeTreeDataPartChecksums & checksums, @@ -97,36 +70,6 @@ public: const DiskPtr & disk, Poco::Logger * log) const override; - void changeRootPath(const std::string & from_root, const std::string & to_root) override; - - void createDirectories() override; - void createProjection(const std::string & name) override; - - std::unique_ptr writeFile( - const String & name, - size_t buf_size, - const WriteSettings & settings) override; - - std::unique_ptr writeTransactionFile(WriteMode mode) const override; - - void createFile(const String & name) override; - void moveFile(const String & from_name, const String & to_name) override; - void replaceFile(const String & from_name, const String & to_name) override; - std::unique_ptr writeFile( - const String & name, - size_t buf_size, - DB::WriteMode mode, - const WriteSettings & settings) override; - - void removeFile(const String & name) override; - void removeFileIfExists(const String & name) override; - void removeRecursive() override; - void removeSharedRecursive(bool keep_in_remote_fs) override; - - SyncGuardPtr getDirectorySyncGuard() const override; - - void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) override; - void rename( std::string new_root_path, std::string new_part_dir, @@ -134,31 +77,58 @@ public: bool remove_new_dir_if_exists, bool fsync_part_dir) override; - void beginTransaction() override; - void commitTransaction() override; - bool hasActiveTransaction() const override { return transaction != nullptr; } + void remove( + CanRemoveCallback && can_remove_callback, + const MergeTreeDataPartChecksums & checksums, + std::list projections, + bool is_temp, + Poco::Logger * log) override; + + void changeRootPath(const std::string & from_root, const std::string & to_root) override; + void createDirectories() override; + + std::unique_ptr writeTransactionFile(WriteMode mode) const override; + + void removeRecursive() override; + void removeSharedRecursive(bool keep_in_remote_fs) override; + + SyncGuardPtr getDirectorySyncGuard() const override; + bool hasActiveTransaction() const override; + +protected: + DiskPtr getDisk() const; + + DataPartStorageOnDiskBase(VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_); + virtual MutableDataPartStoragePtr create(VolumePtr volume_, std::string root_path_, std::string part_dir_, bool initialize_) const = 0; -private: VolumePtr volume; std::string root_path; std::string part_dir; DiskTransactionPtr transaction; bool has_shared_transaction = false; - DataPartStorageOnDisk(VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_); - template - void executeOperation(Op && op); + void executeWriteOperation(Op && op) + { + if (transaction) + op(*transaction); + else + op(*volume->getDisk()); + } +private: void clearDirectory( const std::string & dir, - bool can_remove_shared_data, - const NameSet & names_not_to_remove, + const CanRemoveDescription & can_remove_description, const MergeTreeDataPartChecksums & checksums, - const std::unordered_set & skip_directories, bool is_temp, - MergeTreeDataPartState state, - Poco::Logger * log, - bool is_projection) const; + Poco::Logger * log); + + /// For names of expected data part files returns the actual names + /// of files in filesystem to which data of these files is written. + /// Actual file name may be the same as expected + /// or be the name of the file with packed data. + virtual NameSet getActualFileNamesOnDisk(const NameSet & file_names) const = 0; }; + } diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp new file mode 100644 index 00000000000..62ea3e6cd4e --- /dev/null +++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.cpp @@ -0,0 +1,199 @@ +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + +DataPartStorageOnDiskFull::DataPartStorageOnDiskFull(VolumePtr volume_, std::string root_path_, std::string part_dir_) + : DataPartStorageOnDiskBase(std::move(volume_), std::move(root_path_), std::move(part_dir_)) +{ +} + +DataPartStorageOnDiskFull::DataPartStorageOnDiskFull( + VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_) + : DataPartStorageOnDiskBase(std::move(volume_), std::move(root_path_), std::move(part_dir_), std::move(transaction_)) +{ +} + +MutableDataPartStoragePtr DataPartStorageOnDiskFull::create( + VolumePtr volume_, std::string root_path_, std::string part_dir_, bool /*initialize_*/) const +{ + return std::make_shared(std::move(volume_), std::move(root_path_), std::move(part_dir_)); +} + +MutableDataPartStoragePtr DataPartStorageOnDiskFull::getProjection(const std::string & name, bool use_parent_transaction) // NOLINT +{ + return std::shared_ptr(new DataPartStorageOnDiskFull(volume, std::string(fs::path(root_path) / part_dir), name, use_parent_transaction ? transaction : nullptr)); +} + +DataPartStoragePtr DataPartStorageOnDiskFull::getProjection(const std::string & name) const +{ + return std::make_shared(volume, std::string(fs::path(root_path) / part_dir), name); +} + +bool DataPartStorageOnDiskFull::exists() const +{ + return volume->getDisk()->exists(fs::path(root_path) / part_dir); +} + +bool DataPartStorageOnDiskFull::exists(const std::string & name) const +{ + return volume->getDisk()->exists(fs::path(root_path) / part_dir / name); +} + +bool DataPartStorageOnDiskFull::isDirectory(const std::string & name) const +{ + return volume->getDisk()->isDirectory(fs::path(root_path) / part_dir / name); +} + +class DataPartStorageIteratorOnDisk final : public IDataPartStorageIterator +{ +public: + DataPartStorageIteratorOnDisk(DiskPtr disk_, DirectoryIteratorPtr it_) + : disk(std::move(disk_)), it(std::move(it_)) + { + } + + void next() override { it->next(); } + bool isValid() const override { return it->isValid(); } + bool isFile() const override { return isValid() && disk->isFile(it->path()); } + std::string name() const override { return it->name(); } + std::string path() const override { return it->path(); } + +private: + DiskPtr disk; + DirectoryIteratorPtr it; +}; + +DataPartStorageIteratorPtr DataPartStorageOnDiskFull::iterate() const +{ + return std::make_unique( + volume->getDisk(), + volume->getDisk()->iterateDirectory(fs::path(root_path) / part_dir)); +} + +size_t DataPartStorageOnDiskFull::getFileSize(const String & file_name) const +{ + return volume->getDisk()->getFileSize(fs::path(root_path) / part_dir / file_name); +} + +UInt32 DataPartStorageOnDiskFull::getRefCount(const String & file_name) const +{ + return volume->getDisk()->getRefCount(fs::path(root_path) / part_dir / file_name); +} + +String DataPartStorageOnDiskFull::getUniqueId() const +{ + auto disk = volume->getDisk(); + if (!disk->supportZeroCopyReplication()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Disk {} doesn't support zero-copy replication", disk->getName()); + + return disk->getUniqueId(fs::path(getRelativePath()) / "checksums.txt"); +} + +std::unique_ptr DataPartStorageOnDiskFull::readFile( + const std::string & name, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size) const +{ + return volume->getDisk()->readFile(fs::path(root_path) / part_dir / name, settings, read_hint, file_size); +} + +std::unique_ptr DataPartStorageOnDiskFull::writeFile( + const String & name, + size_t buf_size, + WriteMode mode, + const WriteSettings & settings) +{ + if (transaction) + return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, mode, settings, /* autocommit = */ false); + else + return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, mode, settings); +} + +void DataPartStorageOnDiskFull::createFile(const String & name) +{ + executeWriteOperation([&](auto & disk) { disk.createFile(fs::path(root_path) / part_dir / name); }); +} + +void DataPartStorageOnDiskFull::moveFile(const String & from_name, const String & to_name) +{ + executeWriteOperation([&](auto & disk) + { + auto relative_path = fs::path(root_path) / part_dir; + disk.moveFile(relative_path / from_name, relative_path / to_name); + }); +} + +void DataPartStorageOnDiskFull::replaceFile(const String & from_name, const String & to_name) +{ + executeWriteOperation([&](auto & disk) + { + auto relative_path = fs::path(root_path) / part_dir; + disk.replaceFile(relative_path / from_name, relative_path / to_name); + }); +} + +void DataPartStorageOnDiskFull::removeFile(const String & name) +{ + executeWriteOperation([&](auto & disk) { disk.removeFile(fs::path(root_path) / part_dir / name); }); +} + +void DataPartStorageOnDiskFull::removeFileIfExists(const String & name) +{ + executeWriteOperation([&](auto & disk) { disk.removeFileIfExists(fs::path(root_path) / part_dir / name); }); +} + +void DataPartStorageOnDiskFull::createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) +{ + const auto * source_on_disk = typeid_cast(&source); + if (!source_on_disk) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot create hardlink from different storage. Expected DataPartStorageOnDiskFull, got {}", + typeid(source).name()); + + executeWriteOperation([&](auto & disk) + { + disk.createHardLink( + fs::path(source_on_disk->getRelativePath()) / from, + fs::path(root_path) / part_dir / to); + }); +} + +void DataPartStorageOnDiskFull::createProjection(const std::string & name) +{ + executeWriteOperation([&](auto & disk) { disk.createDirectory(fs::path(root_path) / part_dir / name); }); +} + +void DataPartStorageOnDiskFull::beginTransaction() +{ + if (transaction) + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Uncommitted{}transaction already exists", has_shared_transaction ? " shared " : " "); + + transaction = volume->getDisk()->createTransaction(); +} + +void DataPartStorageOnDiskFull::commitTransaction() +{ + if (!transaction) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no uncommitted transaction"); + + if (has_shared_transaction) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot commit shared transaction"); + + transaction->commit(); + transaction.reset(); +} + +} diff --git a/src/Storages/MergeTree/DataPartStorageOnDiskFull.h b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h new file mode 100644 index 00000000000..8b38bfd7105 --- /dev/null +++ b/src/Storages/MergeTree/DataPartStorageOnDiskFull.h @@ -0,0 +1,62 @@ +#pragma once +#include "Storages/MergeTree/IDataPartStorage.h" +#include + +namespace DB +{ + +/// A storage for data part that stores files on filesystem as is. +class DataPartStorageOnDiskFull final : public DataPartStorageOnDiskBase +{ +public: + DataPartStorageOnDiskFull(VolumePtr volume_, std::string root_path_, std::string part_dir_); + MergeTreeDataPartStorageType getType() const override { return MergeTreeDataPartStorageType::Full; } + + MutableDataPartStoragePtr getProjection(const std::string & name, bool use_parent_transaction = true) override; // NOLINT + DataPartStoragePtr getProjection(const std::string & name) const override; + + bool exists() const override; + bool exists(const std::string & name) const override; + bool isDirectory(const std::string & name) const override; + + DataPartStorageIteratorPtr iterate() const override; + size_t getFileSize(const std::string & file_name) const override; + UInt32 getRefCount(const std::string & file_name) const override; + String getUniqueId() const override; + + std::unique_ptr readFile( + const std::string & name, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size) const override; + + void createProjection(const std::string & name) override; + + std::unique_ptr writeFile( + const String & name, + size_t buf_size, + WriteMode mode, + const WriteSettings & settings) override; + + void createFile(const String & name) override; + void moveFile(const String & from_name, const String & to_name) override; + void replaceFile(const String & from_name, const String & to_name) override; + + void removeFile(const String & name) override; + void removeFileIfExists(const String & name) override; + + void createHardLinkFrom(const IDataPartStorage & source, const std::string & from, const std::string & to) override; + + void beginTransaction() override; + void commitTransaction() override; + void precommitTransaction() override {} + bool hasActiveTransaction() const override { return transaction != nullptr; } + +private: + DataPartStorageOnDiskFull(VolumePtr volume_, std::string root_path_, std::string part_dir_, DiskTransactionPtr transaction_); + MutableDataPartStoragePtr create(VolumePtr volume_, std::string root_path_, std::string part_dir_, bool initialize_) const override; + + NameSet getActualFileNamesOnDisk(const NameSet & file_names) const override { return file_names; } +}; + +} diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f5c07cd8635..cbbdb911974 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include @@ -38,7 +38,6 @@ namespace DB namespace ErrorCodes { - extern const int DIRECTORY_ALREADY_EXISTS; extern const int NO_SUCH_DATA_PART; extern const int ABORTED; extern const int BAD_SIZE_OF_FILE_IN_DATA_PART; @@ -67,7 +66,6 @@ constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION = 7; // Reserved for ALTER PRIMARY KEY // constexpr auto REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PRIMARY_KEY = 8; - std::string getEndpointId(const std::string & node_id) { return "DataPartsExchange:" + node_id; @@ -181,6 +179,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write std::sregex_token_iterator()); bool send_projections = client_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION; + if (send_projections) { const auto & projections = part->getProjectionParts(); @@ -197,7 +196,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write { /// Send metadata if the receiver's capability covers the source disk type. response.addCookie({"remote_fs_metadata", disk_type}); - sendPartFromDiskRemoteMeta(part, out, true, send_projections); + sendPartFromDisk(part, out, client_protocol_version, true, send_projections); return; } } @@ -205,7 +204,7 @@ void Service::processQuery(const HTMLForm & params, ReadBuffer & /*body*/, Write if (isInMemoryPart(part)) sendPartFromMemory(part, out, send_projections); else - sendPartFromDisk(part, out, client_protocol_version, send_projections); + sendPartFromDisk(part, out, client_protocol_version, false, send_projections); } catch (const NetException &) { @@ -261,30 +260,50 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version, + bool from_remote_disk, bool send_projections) { - /// We'll take a list of files from the list of checksums. - MergeTreeData::DataPart::Checksums checksums = part->checksums; - /// Add files that are not in the checksum list. + NameSet files_to_replicate; auto file_names_without_checksums = part->getFileNamesWithoutChecksums(); - for (const auto & file_name : file_names_without_checksums) + + for (const auto & [name, _] : part->checksums.files) { - if (client_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION - && file_name == IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME) + if (endsWith(name, ".proj")) continue; - checksums.files[file_name] = {}; + files_to_replicate.insert(name); + } + + for (const auto & name : file_names_without_checksums) + { + if (client_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_DEFAULT_COMPRESSION + && name == IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME) + continue; + + files_to_replicate.insert(name); + } + + auto data_part_storage = part->getDataPartStoragePtr(); + IDataPartStorage::ReplicatedFilesDescription replicated_description; + + if (from_remote_disk) + { + replicated_description = data_part_storage->getReplicatedFilesDescriptionForRemoteDisk(files_to_replicate); + if (!part->isProjectionPart()) + writeStringBinary(replicated_description.unique_id, out); + } + else + { + replicated_description = data_part_storage->getReplicatedFilesDescription(files_to_replicate); } MergeTreeData::DataPart::Checksums data_checksums; for (const auto & [name, projection] : part->getProjectionParts()) { - // Get rid of projection files - checksums.files.erase(name + ".proj"); if (send_projections) { writeStringBinary(name, out); - MergeTreeData::DataPart::Checksums projection_checksum = sendPartFromDisk(projection, out, client_protocol_version, false); + MergeTreeData::DataPart::Checksums projection_checksum = sendPartFromDisk(projection, out, client_protocol_version, from_remote_disk, false); data_checksums.addFile(name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); } else if (part->checksums.has(name + ".proj")) @@ -295,29 +314,25 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( } } - writeBinary(checksums.files.size(), out); - for (const auto & it : checksums.files) + writeBinary(replicated_description.files.size(), out); + for (const auto & [file_name, desc] : replicated_description.files) { - String file_name = it.first; + writeStringBinary(file_name, out); + writeBinary(desc.file_size, out); - UInt64 size = part->getDataPartStorage().getFileSize(file_name); - - writeStringBinary(it.first, out); - writeBinary(size, out); - - auto file_in = part->getDataPartStorage().readFile(file_name, {}, std::nullopt, std::nullopt); + auto file_in = desc.input_buffer_getter(); HashingWriteBuffer hashing_out(out); copyDataWithThrottler(*file_in, hashing_out, blocker.getCounter(), data.getSendsThrottler()); if (blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Transferring part to replica was cancelled"); - if (hashing_out.count() != size) + if (hashing_out.count() != desc.file_size) throw Exception( ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {}, expected {} got {}", std::string(fs::path(part->getDataPartStorage().getRelativePath()) / file_name), - hashing_out.count(), size); + desc.file_size, hashing_out.count()); writePODBinary(hashing_out.getHash(), out); @@ -325,90 +340,12 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( data_checksums.addFile(file_name, hashing_out.count(), hashing_out.getHash()); } - part->checksums.checkEqual(data_checksums, false); + if (!from_remote_disk && isFullPartStorage(part->getDataPartStorage())) + part->checksums.checkEqual(data_checksums, false); + return data_checksums; } -void Service::sendPartFromDiskRemoteMeta( - const MergeTreeData::DataPartPtr & part, - WriteBuffer & out, - bool send_part_id, - bool send_projections) -{ - auto data_part_storage = part->getDataPartStoragePtr(); - if (!data_part_storage->supportZeroCopyReplication()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Disk '{}' doesn't support zero-copy replication", data_part_storage->getDiskName()); - - /// We'll take a list of files from the list of checksums. - MergeTreeData::DataPart::Checksums checksums = part->checksums; - /// Add files that are not in the checksum list. - auto file_names_without_checksums = part->getFileNamesWithoutChecksums(); - for (const auto & file_name : file_names_without_checksums) - checksums.files[file_name] = {}; - - for (const auto & [name, projection] : part->getProjectionParts()) - { - // Get rid of projection files - checksums.files.erase(name + ".proj"); - } - - std::vector paths; - paths.reserve(checksums.files.size()); - for (const auto & it : checksums.files) - paths.push_back(fs::path(part->getDataPartStorage().getRelativePath()) / it.first); - - /// Serialized metadatadatas with zero ref counts. - auto metadatas = data_part_storage->getSerializedMetadata(paths); - - if (send_part_id) - { - String part_id = data_part_storage->getUniqueId(); - writeStringBinary(part_id, out); - } - - if (send_projections) - { - for (const auto & [name, projection] : part->getProjectionParts()) - { - writeStringBinary(name, out); - sendPartFromDiskRemoteMeta(projection, out, false, false); - } - } - - writeBinary(checksums.files.size(), out); - for (const auto & it : checksums.files) - { - const String & file_name = it.first; - String file_path_prefix = fs::path(part->getDataPartStorage().getRelativePath()) / file_name; - - /// Just some additional checks - String metadata_file_path = fs::path(data_part_storage->getDiskPath()) / file_path_prefix; - fs::path metadata(metadata_file_path); - if (!fs::exists(metadata)) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Remote metadata '{}' is not exists", file_name); - if (!fs::is_regular_file(metadata)) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Remote metadata '{}' is not a file", file_name); - - /// Actual metadata send - auto metadata_str = metadatas[file_path_prefix]; - UInt64 file_size = metadata_str.size(); - ReadBufferFromString buf(metadata_str); - - writeStringBinary(it.first, out); - writeBinary(file_size, out); - - HashingWriteBuffer hashing_out(out); - copyDataWithThrottler(buf, hashing_out, blocker.getCounter(), data.getSendsThrottler()); - if (blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); - - if (hashing_out.count() != file_size) - throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {}", metadata_file_path); - - writePODBinary(hashing_out.getHash(), out); - } -} - MergeTreeData::DataPartPtr Service::findPart(const String & name) { /// It is important to include PreActive and Outdated parts here because remote replicas cannot reliably @@ -602,9 +539,14 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( bool sync = (data_settings->min_compressed_bytes_to_fsync_after_fetch && sum_files_size >= data_settings->min_compressed_bytes_to_fsync_after_fetch); - String part_type = "Wide"; + using PartType = MergeTreeDataPartType; + PartType part_type = PartType::Wide; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_TYPE) - readStringBinary(part_type, *in); + { + String part_type_str; + readStringBinary(part_type_str, *in); + part_type.fromString(part_type_str); + } UUID part_uuid = UUIDHelpers::Nil; if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_UUID) @@ -616,29 +558,33 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( if (server_protocol_version >= REPLICATION_PROTOCOL_VERSION_WITH_PARTS_PROJECTION) readBinary(projections, *in); - MergeTreeData::DataPart::Checksums checksums; if (!remote_fs_metadata.empty()) { if (!try_zero_copy) - throw Exception("Got unexpected 'remote_fs_metadata' cookie", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected 'remote_fs_metadata' cookie"); if (std::find(capability.begin(), capability.end(), remote_fs_metadata) == capability.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", remote_fs_metadata, fmt::join(capability, ", ")); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", + remote_fs_metadata, fmt::join(capability, ", ")); if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version); - if (part_type == "InMemory") - throw Exception("Got 'remote_fs_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); + if (part_type == PartType::InMemory) + throw Exception(ErrorCodes::INCORRECT_PART_TYPE, "Got 'remote_fs_metadata' cookie for in-memory part"); try { - return downloadPartToDiskRemoteMeta(part_name, replica_path, to_detached, tmp_prefix, disk, *in, projections, checksums, throttler); - } + auto output_buffer_getter = [](IDataPartStorage & part_storage, const auto & file_name, size_t file_size) + { + auto full_path = fs::path(part_storage.getFullPath()) / file_name; + return std::make_unique(full_path, std::min(DBMS_DEFAULT_BUFFER_SIZE, file_size)); + }; + return downloadPartToDisk(part_name, replica_path, to_detached, tmp_prefix, disk, true, *in, output_buffer_getter, projections, throttler, sync); + } catch (const Exception & e) { if (e.code() != ErrorCodes::S3_ERROR && e.code() != ErrorCodes::ZERO_COPY_REPLICATION_ERROR) throw; - #if USE_AWS_S3 if (const auto * s3_exception = dynamic_cast(&e)) { @@ -673,7 +619,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( } auto storage_id = data.getStorageID(); - String new_part_path = part_type == "InMemory" ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; + String new_part_path = part_type == PartType::InMemory ? "memory" : fs::path(data.getFullPathOnDisk(disk)) / part_name / ""; auto entry = data.getContext()->getReplicatedFetchList().insert( storage_id.getDatabaseName(), storage_id.getTableName(), part_info.partition_id, part_name, new_part_path, @@ -681,11 +627,11 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( in->setNextCallback(ReplicatedFetchReadCallback(*entry)); - if (part_type == "InMemory") + if (part_type == PartType::InMemory) { auto volume = std::make_shared("volume_" + part_name, disk, 0); - auto data_part_storage = std::make_shared( + auto data_part_storage = std::make_shared( volume, data.getRelativeDataPath(), part_name); @@ -697,9 +643,15 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( projections, false, throttler); } + auto output_buffer_getter = [](IDataPartStorage & part_storage, const String & file_name, size_t file_size) + { + return part_storage.writeFile(file_name, std::min(file_size, DBMS_DEFAULT_BUFFER_SIZE), {}); + }; + return downloadPartToDisk( part_name, replica_path, to_detached, tmp_prefix, - sync, disk, *in, projections, checksums, throttler); + disk, false, *in, output_buffer_getter, + projections, throttler, sync); } MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( @@ -734,7 +686,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( MergeTreeData::DataPart::Checksums checksums; if (!checksums.read(in)) - throw Exception("Cannot deserialize checksums", ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Cannot deserialize checksums"); NativeReader block_in(in, 0); auto block = block_in.read(); @@ -762,71 +714,20 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( return new_data_part; } -void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta( - const String & replica_path, - const MutableDataPartStoragePtr & data_part_storage, - PooledReadWriteBufferFromHTTP & in, - MergeTreeData::DataPart::Checksums & checksums, - ThrottlerPtr throttler) const -{ - size_t files; - readBinary(files, in); - - for (size_t i = 0; i < files; ++i) - { - String file_name; - UInt64 file_size; - - readStringBinary(file_name, in); - readBinary(file_size, in); - - String metadata_file = fs::path(data_part_storage->getFullPath()) / file_name; - - { - auto file_out = std::make_unique(metadata_file, DBMS_DEFAULT_BUFFER_SIZE, -1, 0666, nullptr, 0); - - HashingWriteBuffer hashing_out(*file_out); - - copyDataWithThrottler(in, hashing_out, file_size, blocker.getCounter(), throttler); - - if (blocker.isCancelled()) - { - /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, - /// performing a poll with a not very large timeout. - /// And now we check it only between read chunks (in the `copyData` function). - throw Exception(ErrorCodes::ABORTED, "Fetching of part was cancelled"); - } - - MergeTreeDataPartChecksum::uint128 expected_hash; - readPODBinary(expected_hash, in); - - if (expected_hash != hashing_out.getHash()) - { - throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, - "Checksum mismatch for file {} transferred from {}", - metadata_file, replica_path); - } - - if (file_name != "checksums.txt" && - file_name != "columns.txt" && - file_name != IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME) - checksums.addFile(file_name, file_size, expected_hash); - } - } -} - - void Fetcher::downloadBaseOrProjectionPartToDisk( const String & replica_path, const MutableDataPartStoragePtr & data_part_storage, - bool sync, PooledReadWriteBufferFromHTTP & in, + OutputBufferGetter output_buffer_getter, MergeTreeData::DataPart::Checksums & checksums, - ThrottlerPtr throttler) const + ThrottlerPtr throttler, + bool sync) const { size_t files; readBinary(files, in); + std::vector> written_files; + for (size_t i = 0; i < files; ++i) { String file_name; @@ -844,8 +745,8 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( "This may happen if we are trying to download part from malicious replica or logical error.", absolute_file_path, data_part_storage->getRelativePath()); - auto file_out = data_part_storage->writeFile(file_name, std::min(file_size, DBMS_DEFAULT_BUFFER_SIZE), {}); - HashingWriteBuffer hashing_out(*file_out); + written_files.emplace_back(output_buffer_getter(*data_part_storage, file_name, file_size)); + HashingWriteBuffer hashing_out(*written_files.back()); copyDataWithThrottler(in, hashing_out, file_size, blocker.getCounter(), throttler); if (blocker.isCancelled()) @@ -869,9 +770,14 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( file_name != "columns.txt" && file_name != IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME) checksums.addFile(file_name, file_size, expected_hash); + } + /// Call fsync for all files at once in attempt to decrease the latency + for (auto & file : written_files) + { + file->finalize(); if (sync) - hashing_out.sync(); + file->sync(); } } @@ -880,54 +786,68 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( const String & replica_path, bool to_detached, const String & tmp_prefix, - bool sync, DiskPtr disk, + bool to_remote_disk, PooledReadWriteBufferFromHTTP & in, + OutputBufferGetter output_buffer_getter, size_t projections, - MergeTreeData::DataPart::Checksums & checksums, - ThrottlerPtr throttler) + ThrottlerPtr throttler, + bool sync) { - assert(!tmp_prefix.empty()); + String part_id; const auto data_settings = data.getSettings(); + MergeTreeData::DataPart::Checksums data_checksums; + + if (to_remote_disk) + { + readStringBinary(part_id, in); + + if (!disk->supportZeroCopyReplication() || !disk->checkUniqueId(part_id)) + throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), toString(disk->getDataSourceDescription().type)); + + LOG_DEBUG(log, "Downloading part {} unique id {} metadata onto disk {}.", part_name, part_id, disk->getName()); + data.lockSharedDataTemporary(part_name, part_id, disk); + } + else + { + LOG_DEBUG(log, "Downloading part {} onto disk {}.", part_name, disk->getName()); + } /// We will remove directory if it's already exists. Make precautions. if (tmp_prefix.empty() //-V560 || part_name.empty() || std::string::npos != tmp_prefix.find_first_of("/.") || std::string::npos != part_name.find_first_of("/.")) - throw Exception("Logical error: tmp_prefix and part_name cannot be empty or contain '.' or '/' characters.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: tmp_prefix and part_name cannot be empty or contain '.' or '/' characters."); - String part_dir = tmp_prefix + part_name; - String part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); + auto part_dir = tmp_prefix + part_name; + auto part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); + auto volume = std::make_shared("volume_" + part_name, disk); - auto volume = std::make_shared("volume_" + part_name, disk, 0); + /// Create temporary part storage to write sent files. + /// Actual part storage will be initialized later from metadata. + auto part_storage_for_loading = std::make_shared(volume, part_relative_path, part_dir); + part_storage_for_loading->beginTransaction(); - auto data_part_storage = std::make_shared( - volume, - part_relative_path, - part_dir); - - data_part_storage->beginTransaction(); - - if (data_part_storage->exists()) + if (part_storage_for_loading->exists()) { LOG_WARNING(log, "Directory {} already exists, probably result of a failed fetch. Will remove it before fetching part.", - data_part_storage->getFullPath()); + part_storage_for_loading->getFullPath()); /// Even if it's a temporary part it could be downloaded with zero copy replication and this function /// is executed as a callback. /// /// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs /// or not. So we are not doing it - bool keep_shared = disk->supportZeroCopyReplication() && data_settings->allow_remote_fs_zero_copy_replication; - data_part_storage->removeSharedRecursive(keep_shared); + bool keep_shared = part_storage_for_loading->supportZeroCopyReplication() && data_settings->allow_remote_fs_zero_copy_replication; + part_storage_for_loading->removeSharedRecursive(keep_shared); } - data_part_storage->createDirectories(); + part_storage_for_loading->createDirectories(); SyncGuardPtr sync_guard; if (data.getSettings()->fsync_part_directory) - sync_guard = data_part_storage->getDirectorySyncGuard(); + sync_guard = part_storage_for_loading->getDirectorySyncGuard(); CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; @@ -939,16 +859,18 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( readStringBinary(projection_name, in); MergeTreeData::DataPart::Checksums projection_checksum; - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); + auto projection_part_storage = part_storage_for_loading->getProjection(projection_name + ".proj"); projection_part_storage->createDirectories(); + downloadBaseOrProjectionPartToDisk( - replica_path, projection_part_storage, sync, in, projection_checksum, throttler); - checksums.addFile( + replica_path, projection_part_storage, in, output_buffer_getter, projection_checksum, throttler, sync); + + data_checksums.addFile( projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); } - // Download the base part - downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage, sync, in, checksums, throttler); + downloadBaseOrProjectionPartToDisk( + replica_path, part_storage_for_loading, in, output_buffer_getter, data_checksums, throttler, sync); } catch (const Exception & e) { @@ -956,96 +878,8 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( /// part or fetch of any projection was stopped. if (e.code() == ErrorCodes::ABORTED) { - data_part_storage->removeRecursive(); - data_part_storage->commitTransaction(); - } - throw; - } - - assertEOF(in); - data_part_storage->commitTransaction(); - MergeTreeData::MutableDataPartPtr new_data_part = data.createPart(part_name, data_part_storage); - new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - new_data_part->is_temp = true; - new_data_part->modification_time = time(nullptr); - new_data_part->loadColumnsChecksumsIndexes(true, false); - new_data_part->checksums.checkEqual(checksums, false); - return new_data_part; -} - -MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( - const String & part_name, - const String & replica_path, - bool to_detached, - const String & tmp_prefix, - DiskPtr disk, - PooledReadWriteBufferFromHTTP & in, - size_t projections, - MergeTreeData::DataPart::Checksums & checksums, - ThrottlerPtr throttler) -{ - String part_id; - readStringBinary(part_id, in); - - if (!disk->supportZeroCopyReplication() || !disk->checkUniqueId(part_id)) - { - throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), toString(disk->getDataSourceDescription().type)); - } - - LOG_DEBUG(log, "Downloading Part {} unique id {} metadata onto disk {}.", - part_name, part_id, disk->getName()); - - data.lockSharedDataTemporary(part_name, part_id, disk); - - assert(!tmp_prefix.empty()); - - String part_dir = tmp_prefix + part_name; - String part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); - - auto volume = std::make_shared("volume_" + part_name, disk); - - auto data_part_storage = std::make_shared( - volume, - part_relative_path, - part_dir); - - data_part_storage->beginTransaction(); - - if (data_part_storage->exists()) - throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists.", data_part_storage->getFullPath()); - - CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - - data_part_storage->createDirectories(); - - try - { - for (size_t i = 0; i < projections; ++i) - { - String projection_name; - readStringBinary(projection_name, in); - MergeTreeData::DataPart::Checksums projection_checksum; - - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - projection_part_storage->createDirectories(); - downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, projection_part_storage, in, projection_checksum, throttler); - - checksums.addFile( - projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); - } - - downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, data_part_storage, in, checksums, throttler); - } - catch (const Exception & e) - { - if (e.code() == ErrorCodes::ABORTED) - { - /// Remove the whole part directory if fetch of base - /// part or fetch of any projection was stopped. - data_part_storage->removeSharedRecursive(true); - data_part_storage->commitTransaction(); + part_storage_for_loading->removeSharedRecursive(true); + part_storage_for_loading->commitTransaction(); } throw; } @@ -1054,9 +888,11 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( MergeTreeData::MutableDataPartPtr new_data_part; try { - data_part_storage->commitTransaction(); + part_storage_for_loading->commitTransaction(); + + MergeTreeDataPartBuilder builder(data, part_name, volume, part_relative_path, part_dir); + new_data_part = builder.withPartFormatFromDisk().build(); - new_data_part = data.createPart(part_name, data_part_storage); new_data_part->version.setCreationTID(Tx::PrehistoricTID, nullptr); new_data_part->is_temp = true; new_data_part->modification_time = time(nullptr); @@ -1077,10 +913,17 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( throw; } - data.lockSharedData(*new_data_part, /* replace_existing_lock = */ true, {}); - - LOG_DEBUG(log, "Download of part {} unique id {} metadata onto disk {} finished.", - part_name, part_id, disk->getName()); + if (to_remote_disk) + { + data.lockSharedData(*new_data_part, /* replace_existing_lock = */ true, {}); + LOG_DEBUG(log, "Download of part {} unique id {} metadata onto disk {} finished.", part_name, part_id, disk->getName()); + } + else + { + if (isFullPartStorage(new_data_part->getDataPartStorage())) + new_data_part->checksums.checkEqual(data_checksums, false); + LOG_DEBUG(log, "Download of part {} onto disk {} finished.", part_name, disk->getName()); + } return new_data_part; } diff --git a/src/Storages/MergeTree/DataPartsExchange.h b/src/Storages/MergeTree/DataPartsExchange.h index 6c92fad4092..20c15039a2d 100644 --- a/src/Storages/MergeTree/DataPartsExchange.h +++ b/src/Storages/MergeTree/DataPartsExchange.h @@ -49,12 +49,7 @@ private: const MergeTreeData::DataPartPtr & part, WriteBuffer & out, int client_protocol_version, - bool send_projections); - - void sendPartFromDiskRemoteMeta( - const MergeTreeData::DataPartPtr & part, - WriteBuffer & out, - bool send_part_id, + bool from_remote_disk, bool send_projections); /// StorageReplicatedMergeTree::shutdown() waits for all parts exchange handlers to finish, @@ -93,32 +88,29 @@ public: ActionBlocker blocker; private: + using OutputBufferGetter = std::function(IDataPartStorage &, const String &, size_t)>; + void downloadBaseOrProjectionPartToDisk( const String & replica_path, const MutableDataPartStoragePtr & data_part_storage, - bool sync, PooledReadWriteBufferFromHTTP & in, + OutputBufferGetter output_buffer_getter, MergeTreeData::DataPart::Checksums & checksums, - ThrottlerPtr throttler) const; - - void downloadBasePartOrProjectionPartToDiskRemoteMeta( - const String & replica_path, - const MutableDataPartStoragePtr & data_part_storage, - PooledReadWriteBufferFromHTTP & in, - MergeTreeData::DataPart::Checksums & checksums, - ThrottlerPtr throttler) const; + ThrottlerPtr throttler, + bool sync) const; MergeTreeData::MutableDataPartPtr downloadPartToDisk( const String & part_name, const String & replica_path, bool to_detached, const String & tmp_prefix_, - bool sync, DiskPtr disk, + bool to_remote_disk, PooledReadWriteBufferFromHTTP & in, + OutputBufferGetter output_buffer_getter, size_t projections, - MergeTreeData::DataPart::Checksums & checksums, - ThrottlerPtr throttler); + ThrottlerPtr throttler, + bool sync); MergeTreeData::MutableDataPartPtr downloadPartToMemory( MutableDataPartStoragePtr data_part_storage, diff --git a/src/Storages/MergeTree/DropPartsRanges.cpp b/src/Storages/MergeTree/DropPartsRanges.cpp index bc4f20a3471..19b44c7e173 100644 --- a/src/Storages/MergeTree/DropPartsRanges.cpp +++ b/src/Storages/MergeTree/DropPartsRanges.cpp @@ -44,7 +44,9 @@ void DropPartsRanges::addDropRange(const ReplicatedMergeTreeLogEntryPtr & entry) void DropPartsRanges::removeDropRange(const ReplicatedMergeTreeLogEntryPtr & entry) { if (entry->type != ReplicatedMergeTreeLogEntry::DROP_RANGE) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to remove entry of type {} from drop ranges, expected DROP_RANGE", entry->typeToString()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Trying to remove entry of type {} from drop ranges, expected DROP_RANGE", + entry->typeToString()); auto it = drop_ranges.find(entry->znode_name); assert(it != drop_ranges.end()); diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index ca81578c5c6..996d2bc46a5 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -17,7 +17,7 @@ EphemeralLockInZooKeeper::EphemeralLockInZooKeeper(const String & path_prefix_, : zookeeper(zookeeper_), path_prefix(path_prefix_), path(path_), conflict_path(conflict_path_) { if (conflict_path.empty() && path.size() <= path_prefix.size()) - throw Exception("Logical error: name of the main node is shorter than prefix.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the main node is shorter than prefix."); } template @@ -179,8 +179,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions( size_t prefix_size = block_numbers_path.size() + 1 + partitions[i].size() + 1 + path_prefix.size(); const String & path = dynamic_cast(*lock_responses[i]).path_created; if (path.size() <= prefix_size) - throw Exception("Logical error: name of the sequential node is shorter than prefix.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the sequential node is shorter than prefix."); UInt64 number = parse(path.c_str() + prefix_size, path.size() - prefix_size); locks.push_back(LockInfo{path, partitions[i], number}); diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.h b/src/Storages/MergeTree/EphemeralLockInZooKeeper.h index a8b60c6ef8a..f84f9ebb46c 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.h +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.h @@ -97,7 +97,7 @@ public: void checkCreated() const { if (!isLocked()) - throw Exception("EphemeralLock is not created", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "EphemeralLock is not created"); } ~EphemeralLockInZooKeeper(); diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp index ffd444b7135..959a0e50bca 100644 --- a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp +++ b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp @@ -16,20 +16,23 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_) size_t sum_rows = 0; size_t sum_bytes_uncompressed = 0; - MergeTreeDataPartType future_part_type = MergeTreeDataPartType::Unknown; + MergeTreeDataPartType future_part_type; + MergeTreeDataPartStorageType future_part_storage_type; for (const auto & part : parts_) { sum_rows += part->rows_count; sum_bytes_uncompressed += part->getTotalColumnsSize().data_uncompressed; future_part_type = std::min(future_part_type, part->getType()); + future_part_storage_type = std::min(future_part_storage_type, part->getDataPartStorage().getType()); } - auto chosen_type = parts_.front()->storage.choosePartTypeOnDisk(sum_bytes_uncompressed, sum_rows); - future_part_type = std::min(future_part_type, chosen_type); - assign(std::move(parts_), future_part_type); + auto chosen_format = parts_.front()->storage.choosePartFormatOnDisk(sum_bytes_uncompressed, sum_rows); + future_part_type = std::min(future_part_type, chosen_format.part_type); + future_part_storage_type = std::min(future_part_storage_type, chosen_format.storage_type); + assign(std::move(parts_), {future_part_type, future_part_storage_type}); } -void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, MergeTreeDataPartType future_part_type) +void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, MergeTreeDataPartFormat future_part_format) { if (parts_.empty()) return; @@ -39,9 +42,8 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, Merg const MergeTreeData::DataPartPtr & first_part = parts_.front(); if (part->partition.value != first_part->partition.value) - throw Exception( - "Attempting to merge parts " + first_part->name + " and " + part->name + " that are in different partitions", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempting to merge parts {} and {} that are in different partitions", + first_part->name, part->name); } parts = std::move(parts_); @@ -54,7 +56,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, Merg max_mutation = std::max(max_mutation, part->info.mutation); } - type = future_part_type; + part_format = future_part_format; part_info.partition_id = parts.front()->info.partition_id; part_info.min_block = parts.front()->info.min_block; part_info.max_block = parts.back()->info.max_block; diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.h b/src/Storages/MergeTree/FutureMergedMutatedPart.h index 4447687c3d9..09fb7b01678 100644 --- a/src/Storages/MergeTree/FutureMergedMutatedPart.h +++ b/src/Storages/MergeTree/FutureMergedMutatedPart.h @@ -19,7 +19,7 @@ struct FutureMergedMutatedPart String name; UUID uuid = UUIDHelpers::Nil; String path; - MergeTreeDataPartType type; + MergeTreeDataPartFormat part_format; MergeTreePartInfo part_info; MergeTreeData::DataPartsVector parts; MergeType merge_type = MergeType::Regular; @@ -33,13 +33,13 @@ struct FutureMergedMutatedPart assign(std::move(parts_)); } - FutureMergedMutatedPart(MergeTreeData::DataPartsVector parts_, MergeTreeDataPartType future_part_type) + FutureMergedMutatedPart(MergeTreeData::DataPartsVector parts_, MergeTreeDataPartFormat future_part_format) { - assign(std::move(parts_), future_part_type); + assign(std::move(parts_), future_part_format); } void assign(MergeTreeData::DataPartsVector parts_); - void assign(MergeTreeData::DataPartsVector parts_, MergeTreeDataPartType future_part_type); + void assign(MergeTreeData::DataPartsVector parts_, MergeTreeDataPartFormat future_part_format); void updatePath(const MergeTreeData & storage, const IReservation * reservation); }; diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index 995f4f9f88c..0904855755c 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -1,115 +1,84 @@ #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace DB { -using TokenPostingsBuilderPair = std::pair; -using TokenPostingsBuilderPairs = std::vector; - namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int UNKNOWN_FORMAT_VERSION; }; -GinIndexStore::GinIndexStore(const String & name_, DataPartStoragePtr storage_) - : name(name_) - , storage(storage_) -{ -} -GinIndexStore::GinIndexStore(const String& name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_) - : name(name_) - , storage(storage_) - , data_part_storage_builder(data_part_storage_builder_) - , max_digestion_size(max_digestion_size_) -{ -} - -GinIndexPostingsBuilder::GinIndexPostingsBuilder(UInt64 limit) : rowid_lst{}, size_limit(limit) +GinIndexPostingsBuilder::GinIndexPostingsBuilder(UInt64 limit) + : rowid_lst{} + , size_limit(limit) {} bool GinIndexPostingsBuilder::contains(UInt32 row_id) const { if (useRoaring()) return rowid_bitmap.contains(row_id); - - const auto * const it = std::find(rowid_lst.begin(), rowid_lst.begin()+rowid_lst_length, row_id); - return it != rowid_lst.begin() + rowid_lst_length; + else + { + const auto * const it = std::find(rowid_lst.begin(), rowid_lst.begin()+rowid_lst_length, row_id); + return it != rowid_lst.begin() + rowid_lst_length; + } } void GinIndexPostingsBuilder::add(UInt32 row_id) { if (containsAllRows()) - { return; - } + if (useRoaring()) { if (rowid_bitmap.cardinality() == size_limit) { - //reset the postings list with MATCH ALWAYS; - rowid_lst_length = 1; //makes sure useRoaring() returns false; - rowid_lst[0] = UINT32_MAX; //set CONTAINS ALL flag; + /// reset the postings list with MATCH ALWAYS; + rowid_lst_length = 1; /// makes sure useRoaring() returns false; + rowid_lst[0] = CONTAINS_ALL; /// set CONTAINS_ALL flag; } else - { rowid_bitmap.add(row_id); - } - return; } - assert(rowid_lst_length < MIN_SIZE_FOR_ROARING_ENCODING); - rowid_lst[rowid_lst_length] = row_id; - rowid_lst_length++; - - if (rowid_lst_length == MIN_SIZE_FOR_ROARING_ENCODING) + else { - for (size_t i = 0; i < rowid_lst_length; i++) - rowid_bitmap.add(rowid_lst[i]); + assert(rowid_lst_length < MIN_SIZE_FOR_ROARING_ENCODING); + rowid_lst[rowid_lst_length] = row_id; + rowid_lst_length++; - rowid_lst_length = UsesBitMap; + if (rowid_lst_length == MIN_SIZE_FOR_ROARING_ENCODING) + { + for (size_t i = 0; i < rowid_lst_length; i++) + rowid_bitmap.add(rowid_lst[i]); + + rowid_lst_length = USES_BIT_MAP; + } } } -bool GinIndexPostingsBuilder::useRoaring() const -{ - return rowid_lst_length == UsesBitMap; -} - -bool GinIndexPostingsBuilder::containsAllRows() const -{ - return rowid_lst[0] == UINT32_MAX; -} - -UInt64 GinIndexPostingsBuilder::serialize(WriteBuffer &buffer) const +UInt64 GinIndexPostingsBuilder::serialize(WriteBuffer & buffer) const { UInt64 written_bytes = 0; buffer.write(rowid_lst_length); written_bytes += 1; - if (!useRoaring()) - { - for (size_t i = 0; i < rowid_lst_length; ++i) - { - writeVarUInt(rowid_lst[i], buffer); - written_bytes += getLengthOfVarUInt(rowid_lst[i]); - } - } - else + if (useRoaring()) { auto size = rowid_bitmap.getSizeInBytes(); @@ -121,65 +90,85 @@ UInt64 GinIndexPostingsBuilder::serialize(WriteBuffer &buffer) const buffer.write(buf.get(), size); written_bytes += size; } + else + { + for (size_t i = 0; i < rowid_lst_length; ++i) + { + writeVarUInt(rowid_lst[i], buffer); + written_bytes += getLengthOfVarUInt(rowid_lst[i]); + } + } + return written_bytes; } -GinIndexPostingsListPtr GinIndexPostingsBuilder::deserialize(ReadBuffer &buffer) +GinIndexPostingsListPtr GinIndexPostingsBuilder::deserialize(ReadBuffer & buffer) { UInt8 postings_list_size = 0; - buffer.readStrict(reinterpret_cast(postings_list_size)); + buffer.readStrict(reinterpret_cast(postings_list_size)); - if (postings_list_size != UsesBitMap) + if (postings_list_size == USES_BIT_MAP) + { + size_t size = 0; + readVarUInt(size, buffer); + auto buf = std::make_unique(size); + buffer.readStrict(reinterpret_cast(buf.get()), size); + + GinIndexPostingsListPtr postings_list = std::make_shared(GinIndexPostingsList::read(buf.get())); + + return postings_list; + } + else { assert(postings_list_size < MIN_SIZE_FOR_ROARING_ENCODING); GinIndexPostingsListPtr postings_list = std::make_shared(); UInt32 row_ids[MIN_SIZE_FOR_ROARING_ENCODING]; for (auto i = 0; i < postings_list_size; ++i) - { readVarUInt(row_ids[i], buffer); - } postings_list->addMany(postings_list_size, row_ids); return postings_list; } - else - { - size_t size{0}; - readVarUInt(size, buffer); - auto buf = std::make_unique(size); - buffer.readStrict(reinterpret_cast(buf.get()), size); +} - GinIndexPostingsListPtr postings_list = std::make_shared(GinIndexPostingsList::read(buf.get())); - - return postings_list; - } +GinIndexStore::GinIndexStore(const String & name_, DataPartStoragePtr storage_) + : name(name_) + , storage(storage_) +{ +} +GinIndexStore::GinIndexStore(const String & name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_) + : name(name_) + , storage(storage_) + , data_part_storage_builder(data_part_storage_builder_) + , max_digestion_size(max_digestion_size_) +{ } bool GinIndexStore::exists() const { - String id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; - return storage->exists(id_file_name); + String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; + return storage->exists(segment_id_file_name); } -UInt32 GinIndexStore::getNextSegmentIDRange(const String& file_name, size_t n) +UInt32 GinIndexStore::getNextSegmentIDRange(const String & file_name, size_t n) { - std::lock_guard guard(gin_index_store_mutex); + std::lock_guard guard(mutex); - /// When the method is called for the first time, the file doesn't exist yet, need to create it - /// and write segment ID 1. + /// When the method is called for the first time, the file doesn't exist yet, need to create it and write segment ID 1. if (!storage->exists(file_name)) { - /// Create file and write initial segment id = 1 + /// Create file std::unique_ptr ostr = this->data_part_storage_builder->writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {}); /// Write version writeChar(static_cast(CURRENT_GIN_FILE_FORMAT_VERSION), *ostr); + /// Write segment ID 1 writeVarUInt(1, *ostr); ostr->sync(); } - /// read id in file + /// Read id in file UInt32 result = 0; { std::unique_ptr istr = this->storage->readFile(file_name, {}, std::nullopt, std::nullopt); @@ -189,7 +178,8 @@ UInt32 GinIndexStore::getNextSegmentIDRange(const String& file_name, size_t n) readVarUInt(result, *istr); } - //save result+n + + /// Save result + n { std::unique_ptr ostr = this->data_part_storage_builder->writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {}); @@ -204,15 +194,15 @@ UInt32 GinIndexStore::getNextSegmentIDRange(const String& file_name, size_t n) UInt32 GinIndexStore::getNextRowIDRange(size_t numIDs) { - UInt32 result =current_segment.next_row_id; + UInt32 result = current_segment.next_row_id; current_segment.next_row_id += numIDs; return result; } UInt32 GinIndexStore::getNextSegmentID() { - String sid_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; - return getNextSegmentIDRange(sid_file_name, 1); + String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; + return getNextSegmentIDRange(segment_id_file_name, 1); } UInt32 GinIndexStore::getNumOfSegments() @@ -220,18 +210,18 @@ UInt32 GinIndexStore::getNumOfSegments() if (cached_segment_num) return cached_segment_num; - String sid_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; - if (!storage->exists(sid_file_name)) + String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; + if (!storage->exists(segment_id_file_name)) return 0; UInt32 result = 0; { - std::unique_ptr istr = this->storage->readFile(sid_file_name, {}, std::nullopt, std::nullopt); + std::unique_ptr istr = this->storage->readFile(segment_id_file_name, {}, std::nullopt, std::nullopt); uint8_t version = 0; readBinary(version, *istr); - if (version > CURRENT_GIN_FILE_FORMAT_VERSION) + if (version > static_cast>(CURRENT_GIN_FILE_FORMAT_VERSION)) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported inverted index version {}", version); readVarUInt(result, *istr); @@ -250,88 +240,84 @@ bool GinIndexStore::needToWrite() const void GinIndexStore::finalize() { if (!current_postings.empty()) - { writeSegment(); - } } void GinIndexStore::initFileStreams() { - String segment_file_name = getName() + GIN_SEGMENT_FILE_TYPE; - String term_dict_file_name = getName() + GIN_DICTIONARY_FILE_TYPE; + String metadata_file_name = getName() + GIN_SEGMENT_METADATA_FILE_TYPE; + String dict_file_name = getName() + GIN_DICTIONARY_FILE_TYPE; String postings_file_name = getName() + GIN_POSTINGS_FILE_TYPE; - segment_file_stream = data_part_storage_builder->writeFile(segment_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); - term_dict_file_stream = data_part_storage_builder->writeFile(term_dict_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); + metadata_file_stream = data_part_storage_builder->writeFile(metadata_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); + dict_file_stream = data_part_storage_builder->writeFile(dict_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); postings_file_stream = data_part_storage_builder->writeFile(postings_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); } void GinIndexStore::writeSegment() { - if (segment_file_stream == nullptr) - { + if (metadata_file_stream == nullptr) initFileStreams(); - } + + using TokenPostingsBuilderPair = std::pair; + using TokenPostingsBuilderPairs = std::vector; /// Write segment - segment_file_stream->write(reinterpret_cast(¤t_segment), sizeof(GinIndexSegment)); + metadata_file_stream->write(reinterpret_cast(¤t_segment), sizeof(GinIndexSegment)); TokenPostingsBuilderPairs token_postings_list_pairs; token_postings_list_pairs.reserve(current_postings.size()); - for (const auto& [token, postings_list] : current_postings) - { + for (const auto & [token, postings_list] : current_postings) token_postings_list_pairs.push_back({token, postings_list}); - } /// Sort token-postings list pairs since all tokens have to be added in FST in sorted order std::sort(token_postings_list_pairs.begin(), token_postings_list_pairs.end(), - [](const TokenPostingsBuilderPair& a, const TokenPostingsBuilderPair & b) + [](const TokenPostingsBuilderPair & x, const TokenPostingsBuilderPair & y) { - return a.first < b.first; + return x.first < y.first; }); - ///write postings + /// Write postings std::vector posting_list_byte_sizes(current_postings.size(), 0); - for (size_t current_index = 0; const auto& [token, postings_list] : token_postings_list_pairs) + for (size_t i = 0; const auto & [token, postings_list] : token_postings_list_pairs) { auto posting_list_byte_size = postings_list->serialize(*postings_file_stream); - posting_list_byte_sizes[current_index] = posting_list_byte_size; - current_index++; + posting_list_byte_sizes[i] = posting_list_byte_size; + i++; current_segment.postings_start_offset += posting_list_byte_size; } ///write item dictionary std::vector buffer; WriteBufferFromVector> write_buf(buffer); - FST::FSTBuilder builder(write_buf); + FST::FstBuilder fst_builder(write_buf); UInt64 offset = 0; - for (size_t current_index = 0; const auto& [token, postings_list] : token_postings_list_pairs) + for (size_t i = 0; const auto & [token, postings_list] : token_postings_list_pairs) { - String str_token{token}; - builder.add(str_token, offset); - offset += posting_list_byte_sizes[current_index]; - current_index++; + fst_builder.add(token, offset); + offset += posting_list_byte_sizes[i]; + i++; } - builder.build(); + fst_builder.build(); write_buf.finalize(); /// Write FST size - writeVarUInt(buffer.size(), *term_dict_file_stream); - current_segment.term_dict_start_offset += getLengthOfVarUInt(buffer.size()); + writeVarUInt(buffer.size(), *dict_file_stream); + current_segment.dict_start_offset += getLengthOfVarUInt(buffer.size()); - /// Write FST content - term_dict_file_stream->write(reinterpret_cast(buffer.data()), buffer.size()); - current_segment.term_dict_start_offset += buffer.size(); + /// Write FST blob + dict_file_stream->write(reinterpret_cast(buffer.data()), buffer.size()); + current_segment.dict_start_offset += buffer.size(); current_size = 0; current_postings.clear(); current_segment.segment_id = getNextSegmentID(); - segment_file_stream->sync(); - term_dict_file_stream->sync(); + metadata_file_stream->sync(); + dict_file_stream->sync(); postings_file_stream->sync(); } @@ -343,82 +329,79 @@ GinIndexStoreDeserializer::GinIndexStoreDeserializer(const GinIndexStorePtr & st void GinIndexStoreDeserializer::initFileStreams() { - String segment_file_name = store->getName() + GinIndexStore::GIN_SEGMENT_FILE_TYPE; - String term_dict_file_name = store->getName() + GinIndexStore::GIN_DICTIONARY_FILE_TYPE; + String metadata_file_name = store->getName() + GinIndexStore::GIN_SEGMENT_METADATA_FILE_TYPE; + String dict_file_name = store->getName() + GinIndexStore::GIN_DICTIONARY_FILE_TYPE; String postings_file_name = store->getName() + GinIndexStore::GIN_POSTINGS_FILE_TYPE; - segment_file_stream = store->storage->readFile(segment_file_name, {}, std::nullopt, std::nullopt); - term_dict_file_stream = store->storage->readFile(term_dict_file_name, {}, std::nullopt, std::nullopt); + metadata_file_stream = store->storage->readFile(metadata_file_name, {}, std::nullopt, std::nullopt); + dict_file_stream = store->storage->readFile(dict_file_name, {}, std::nullopt, std::nullopt); postings_file_stream = store->storage->readFile(postings_file_name, {}, std::nullopt, std::nullopt); } void GinIndexStoreDeserializer::readSegments() { - auto num_segments = store->getNumOfSegments(); + UInt32 num_segments = store->getNumOfSegments(); if (num_segments == 0) return; + using GinIndexSegments = std::vector; GinIndexSegments segments (num_segments); - assert(segment_file_stream != nullptr); + assert(metadata_file_stream != nullptr); - segment_file_stream->readStrict(reinterpret_cast(segments.data()), num_segments * sizeof(GinIndexSegment)); - for (size_t i = 0; i < num_segments; ++i) + metadata_file_stream->readStrict(reinterpret_cast(segments.data()), num_segments * sizeof(GinIndexSegment)); + for (UInt32 i = 0; i < num_segments; ++i) { auto seg_id = segments[i].segment_id; - auto term_dict = std::make_shared(); - term_dict->postings_start_offset = segments[i].postings_start_offset; - term_dict->term_dict_start_offset = segments[i].term_dict_start_offset; - store->term_dicts[seg_id] = term_dict; + auto seg_dict = std::make_shared(); + seg_dict->postings_start_offset = segments[i].postings_start_offset; + seg_dict->dict_start_offset = segments[i].dict_start_offset; + store->segment_dictionaries[seg_id] = seg_dict; } } -void GinIndexStoreDeserializer::readSegmentTermDictionaries() +void GinIndexStoreDeserializer::readSegmentDictionaries() { for (UInt32 seg_index = 0; seg_index < store->getNumOfSegments(); ++seg_index) - { - readSegmentTermDictionary(seg_index); - } + readSegmentDictionary(seg_index); } -void GinIndexStoreDeserializer::readSegmentTermDictionary(UInt32 segment_id) +void GinIndexStoreDeserializer::readSegmentDictionary(UInt32 segment_id) { /// Check validity of segment_id - auto it = store->term_dicts.find(segment_id); - if (it == store->term_dicts.end()) - { + auto it = store->segment_dictionaries.find(segment_id); + if (it == store->segment_dictionaries.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid segment id {}", segment_id); - } - assert(term_dict_file_stream != nullptr); + assert(dict_file_stream != nullptr); - /// Set file pointer of term dictionary file - term_dict_file_stream->seek(it->second->term_dict_start_offset, SEEK_SET); + /// Set file pointer of dictionary file + dict_file_stream->seek(it->second->dict_start_offset, SEEK_SET); it->second->offsets.getData().clear(); /// Read FST size - size_t fst_size{0}; - readVarUInt(fst_size, *term_dict_file_stream); + size_t fst_size = 0; + readVarUInt(fst_size, *dict_file_stream); - /// Read FST content + /// Read FST blob it->second->offsets.getData().resize(fst_size); - term_dict_file_stream->readStrict(reinterpret_cast(it->second->offsets.getData().data()), fst_size); + dict_file_stream->readStrict(reinterpret_cast(it->second->offsets.getData().data()), fst_size); } -SegmentedPostingsListContainer GinIndexStoreDeserializer::readSegmentedPostingsLists(const String& term) +GinSegmentedPostingsListContainer GinIndexStoreDeserializer::readSegmentedPostingsLists(const String & term) { assert(postings_file_stream != nullptr); - SegmentedPostingsListContainer container; - for (auto const& seg_term_dict : store->term_dicts) + GinSegmentedPostingsListContainer container; + for (auto const & seg_dict : store->segment_dictionaries) { - auto segment_id = seg_term_dict.first; + auto segment_id = seg_dict.first; - auto [offset, found] = seg_term_dict.second->offsets.getOutput(term); + auto [offset, found] = seg_dict.second->offsets.getOutput(term); if (!found) continue; // Set postings file pointer for reading postings list - postings_file_stream->seek(seg_term_dict.second->postings_start_offset + offset, SEEK_SET); + postings_file_stream->seek(seg_dict.second->postings_start_offset + offset, SEEK_SET); // Read posting list auto postings_list = GinIndexPostingsBuilder::deserialize(*postings_file_stream); @@ -427,10 +410,10 @@ SegmentedPostingsListContainer GinIndexStoreDeserializer::readSegmentedPostingsL return container; } -PostingsCachePtr GinIndexStoreDeserializer::createPostingsCacheFromTerms(const std::vector& terms) +GinPostingsCachePtr GinIndexStoreDeserializer::createPostingsCacheFromTerms(const std::vector & terms) { - auto postings_cache = std::make_shared(); - for (const auto& term : terms) + auto postings_cache = std::make_shared(); + for (const auto & term : terms) { // Make sure don't read for duplicated terms if (postings_cache->find(term) != postings_cache->end()) @@ -442,18 +425,26 @@ PostingsCachePtr GinIndexStoreDeserializer::createPostingsCacheFromTerms(const s return postings_cache; } -GinIndexStoreFactory& GinIndexStoreFactory::instance() +GinPostingsCachePtr PostingsCacheForStore::getPostings(const String & query_string) const +{ + auto it = cache.find(query_string); + if (it == cache.end()) + return nullptr; + return it->second; +} + +GinIndexStoreFactory & GinIndexStoreFactory::instance() { static GinIndexStoreFactory instance; return instance; } -GinIndexStorePtr GinIndexStoreFactory::get(const String& name, DataPartStoragePtr storage) +GinIndexStorePtr GinIndexStoreFactory::get(const String & name, DataPartStoragePtr storage) { - const String& part_path = storage->getRelativePath(); + const String & part_path = storage->getRelativePath(); String key = name + ":" + part_path; - std::lock_guard lock(stores_mutex); + std::lock_guard lock(mutex); GinIndexStores::const_iterator it = stores.find(key); if (it == stores.end()) @@ -464,7 +455,7 @@ GinIndexStorePtr GinIndexStoreFactory::get(const String& name, DataPartStoragePt GinIndexStoreDeserializer deserializer(store); deserializer.readSegments(); - deserializer.readSegmentTermDictionaries(); + deserializer.readSegmentDictionaries(); stores[key] = store; @@ -473,9 +464,9 @@ GinIndexStorePtr GinIndexStoreFactory::get(const String& name, DataPartStoragePt return it->second; } -void GinIndexStoreFactory::remove(const String& part_path) +void GinIndexStoreFactory::remove(const String & part_path) { - std::lock_guard lock(stores_mutex); + std::lock_guard lock(mutex); for (auto it = stores.begin(); it != stores.end();) { if (it->first.find(part_path) != String::npos) @@ -484,4 +475,5 @@ void GinIndexStoreFactory::remove(const String& part_path) ++it; } } + } diff --git a/src/Storages/MergeTree/GinIndexStore.h b/src/Storages/MergeTree/GinIndexStore.h index c326322191f..e075a7e73bf 100644 --- a/src/Storages/MergeTree/GinIndexStore.h +++ b/src/Storages/MergeTree/GinIndexStore.h @@ -1,18 +1,18 @@ #pragma once -#include -#include -#include -#include +#include #include #include #include #include -#include -#include #include +#include +#include +#include +#include +#include -/// GinIndexStore manages the inverted index for a data part, and it is made up of one or more immutable +/// GinIndexStore manages the generalized inverted index ("gin") for a data part, and it is made up of one or more immutable /// index segments. /// /// There are 4 types of index files in a store: @@ -20,40 +20,30 @@ /// 2. Segment Metadata file(.gin_seg): it contains index segment metadata. /// - Its file format is an array of GinIndexSegment as defined in this file. /// - postings_start_offset points to the file(.gin_post) starting position for the segment's postings list. -/// - term_dict_start_offset points to the file(.gin_dict) starting position for the segment's term dictionaries. -/// 3. Term Dictionary file(.gin_dict): it contains term dictionaries. +/// - dict_start_offset points to the file(.gin_dict) starting position for the segment's dictionaries. +/// 3. Dictionary file(.gin_dict): it contains dictionaries. /// - It contains an array of (FST_size, FST_blob) which has size and actual data of FST. /// 4. Postings Lists(.gin_post): it contains postings lists data. /// - It contains an array of serialized postings lists. /// /// During the searching in the segment, the segment's meta data can be found in .gin_seg file. From the meta data, -/// the starting position of its term dictionary is used to locate its FST. Then FST is read into memory. +/// the starting position of its dictionary is used to locate its FST. Then FST is read into memory. /// By using the term and FST, the offset("output" in FST) of the postings list for the term /// in FST is found. The offset plus the postings_start_offset is the file location in .gin_post file /// for its postings list. namespace DB { -enum : uint8_t -{ - GIN_VERSION_0 = 0, - GIN_VERSION_1 = 1, /// Initial version -}; - -static constexpr auto CURRENT_GIN_FILE_FORMAT_VERSION = GIN_VERSION_1; /// GinIndexPostingsList which uses 32-bit Roaring using GinIndexPostingsList = roaring::Roaring; - using GinIndexPostingsListPtr = std::shared_ptr; -/// Gin Index Postings List Builder. +/// Build a postings list for a term class GinIndexPostingsBuilder { public: - constexpr static int MIN_SIZE_FOR_ROARING_ENCODING = 16; - - GinIndexPostingsBuilder(UInt64 limit); + explicit GinIndexPostingsBuilder(UInt64 limit); /// Check whether a row_id is already added bool contains(UInt32 row_id) const; @@ -61,91 +51,88 @@ public: /// Add a row_id into the builder void add(UInt32 row_id); - /// Check whether the builder is using roaring bitmap - bool useRoaring() const; - - /// Check whether the postings list has been flagged to contain all row ids - bool containsAllRows() const; - /// Serialize the content of builder to given WriteBuffer, returns the bytes of serialized data - UInt64 serialize(WriteBuffer &buffer) const; + UInt64 serialize(WriteBuffer & buffer) const; /// Deserialize the postings list data from given ReadBuffer, return a pointer to the GinIndexPostingsList created by deserialization - static GinIndexPostingsListPtr deserialize(ReadBuffer &buffer); + static GinIndexPostingsListPtr deserialize(ReadBuffer & buffer); + private: + constexpr static int MIN_SIZE_FOR_ROARING_ENCODING = 16; + /// When the list length is no greater than MIN_SIZE_FOR_ROARING_ENCODING, array 'rowid_lst' is used + /// As a special case, rowid_lst[0] == CONTAINS_ALL encodes that all rowids are set. std::array rowid_lst; - /// When the list length is greater than MIN_SIZE_FOR_ROARING_ENCODING, Roaring bitmap 'rowid_bitmap' is used + /// When the list length is greater than MIN_SIZE_FOR_ROARING_ENCODING, roaring bitmap 'rowid_bitmap' is used roaring::Roaring rowid_bitmap; /// rowid_lst_length stores the number of row IDs in 'rowid_lst' array, can also be a flag(0xFF) indicating that roaring bitmap is used - UInt8 rowid_lst_length{0}; + UInt8 rowid_lst_length = 0; + + /// Indicates that all rowids are contained, see 'rowid_lst' + static constexpr UInt32 CONTAINS_ALL = std::numeric_limits::max(); + + /// Indicates that roaring bitmap is used, see 'rowid_lst_length'. + static constexpr UInt8 USES_BIT_MAP = 0xFF; - static constexpr UInt8 UsesBitMap = 0xFF; /// Clear the postings list and reset it with MATCHALL flags when the size of the postings list is beyond the limit UInt64 size_limit; + + /// Check whether the builder is using roaring bitmap + bool useRoaring() const { return rowid_lst_length == USES_BIT_MAP; } + + /// Check whether the postings list has been flagged to contain all row ids + bool containsAllRows() const { return rowid_lst[0] == CONTAINS_ALL; } }; -/// Container for postings lists for each segment -using SegmentedPostingsListContainer = std::unordered_map; +using GinIndexPostingsBuilderPtr = std::shared_ptr; -/// Postings lists and terms built from query string -using PostingsCache = std::unordered_map; -using PostingsCachePtr = std::shared_ptr; - -/// Gin Index Segment information, which contains: +/// Gin index segment descriptor, which contains: struct GinIndexSegment { /// Segment ID retrieved from next available ID from file .gin_sid UInt32 segment_id = 0; - /// Next row ID for this segment + /// Start row ID for this segment UInt32 next_row_id = 1; /// .gin_post file offset of this segment's postings lists UInt64 postings_start_offset = 0; - /// .term_dict file offset of this segment's term dictionaries - UInt64 term_dict_start_offset = 0; + /// .gin_dict file offset of this segment's dictionaries + UInt64 dict_start_offset = 0; }; -using GinIndexSegments = std::vector; - -struct SegmentTermDictionary +struct GinSegmentDictionary { /// .gin_post file offset of this segment's postings lists UInt64 postings_start_offset; - /// .gin_dict file offset of this segment's term dictionaries - UInt64 term_dict_start_offset; + /// .gin_dict file offset of this segment's dictionaries + UInt64 dict_start_offset; - /// Finite State Transducer, which can be viewed as a map of , where offset is the + /// (Minimized) Finite State Transducer, which can be viewed as a map of , where offset is the /// offset to the term's posting list in postings list file FST::FiniteStateTransducer offsets; }; -using SegmentTermDictionaryPtr = std::shared_ptr; +using GinSegmentDictionaryPtr = std::shared_ptr; -/// Term dictionaries indexed by segment ID -using SegmentTermDictionaries = std::unordered_map; - -/// Gin Index Store which has Gin Index meta data for the corresponding Data Part +/// Gin index store which has gin index meta data for the corresponding column data part class GinIndexStore { public: - using GinIndexPostingsBuilderPtr = std::shared_ptr; /// Container for all term's Gin Index Postings List Builder using GinIndexPostingsBuilderContainer = std::unordered_map; - explicit GinIndexStore(const String & name_, DataPartStoragePtr storage_); - - GinIndexStore(const String& name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_); + GinIndexStore(const String & name_, DataPartStoragePtr storage_); + GinIndexStore(const String & name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_); /// Check existence by checking the existence of file .gin_sid bool exists() const; - /// Get a range of next 'numIDs' available row IDs + /// Get a range of next 'numIDs'-many available row IDs UInt32 getNextRowIDRange(size_t numIDs); /// Get next available segment ID by updating file .gin_sid @@ -155,25 +142,26 @@ public: UInt32 getNumOfSegments(); /// Get current postings list builder - const GinIndexPostingsBuilderContainer& getPostings() const { return current_postings; } + const GinIndexPostingsBuilderContainer & getPostingsListBuilder() const { return current_postings; } /// Set postings list builder for given term void setPostingsBuilder(const String & term, GinIndexPostingsBuilderPtr builder) { current_postings[term] = builder; } + /// Check if we need to write segment to Gin index files bool needToWrite() const; /// Accumulate the size of text data which has been digested void incrementCurrentSizeBy(UInt64 sz) { current_size += sz; } - UInt32 getCurrentSegmentID() const { return current_segment.segment_id;} + UInt32 getCurrentSegmentID() const { return current_segment.segment_id; } /// Do last segment writing void finalize(); - /// method for writing segment data to Gin index files + /// Method for writing segment data to Gin index files void writeSegment(); - const String & getName() const {return name;} + const String & getName() const { return name; } private: friend class GinIndexStoreDeserializer; @@ -182,7 +170,7 @@ private: void initFileStreams(); /// Get a range of next available segment IDs by updating file .gin_sid - UInt32 getNextSegmentIDRange(const String &file_name, size_t n); + UInt32 getNextSegmentIDRange(const String & file_name, size_t n); String name; DataPartStoragePtr storage; @@ -190,37 +178,89 @@ private: UInt32 cached_segment_num = 0; - std::mutex gin_index_store_mutex; + std::mutex mutex; - /// Terms dictionaries which are loaded from .gin_dict files - SegmentTermDictionaries term_dicts; + /// Dictionaries indexed by segment ID + using GinSegmentDictionaries = std::unordered_map; - /// container for building postings lists during index construction + /// Term's dictionaries which are loaded from .gin_dict files + GinSegmentDictionaries segment_dictionaries; + + /// Container for building postings lists during index construction GinIndexPostingsBuilderContainer current_postings; - /// The following is for segmentation of Gin index - GinIndexSegment current_segment{}; + /// For the segmentation of Gin indexes + GinIndexSegment current_segment; UInt64 current_size = 0; const UInt64 max_digestion_size = 0; - /// File streams for segment, term dictionaries and postings lists - std::unique_ptr segment_file_stream; - std::unique_ptr term_dict_file_stream; + /// File streams for segment, dictionaries and postings lists + std::unique_ptr metadata_file_stream; + std::unique_ptr dict_file_stream; std::unique_ptr postings_file_stream; static constexpr auto GIN_SEGMENT_ID_FILE_TYPE = ".gin_sid"; - static constexpr auto GIN_SEGMENT_FILE_TYPE = ".gin_seg"; + static constexpr auto GIN_SEGMENT_METADATA_FILE_TYPE = ".gin_seg"; static constexpr auto GIN_DICTIONARY_FILE_TYPE = ".gin_dict"; static constexpr auto GIN_POSTINGS_FILE_TYPE = ".gin_post"; + + enum class Format : uint8_t + { + v0 = 0, + v1 = 1, /// Initial version + }; + + static constexpr auto CURRENT_GIN_FILE_FORMAT_VERSION = Format::v0; }; using GinIndexStorePtr = std::shared_ptr; -/// GinIndexStores indexed by part file path -using GinIndexStores = std::unordered_map; +/// Container for postings lists for each segment +using GinSegmentedPostingsListContainer = std::unordered_map; + +/// Postings lists and terms built from query string +using GinPostingsCache = std::unordered_map; +using GinPostingsCachePtr = std::shared_ptr; + +/// Gin index store reader which helps to read segments, dictionaries and postings list +class GinIndexStoreDeserializer : private boost::noncopyable +{ +public: + explicit GinIndexStoreDeserializer(const GinIndexStorePtr & store_); + + /// Read segment information from .gin_seg files + void readSegments(); + + /// Read all dictionaries from .gin_dict files + void readSegmentDictionaries(); + + /// Read dictionary for given segment id + void readSegmentDictionary(UInt32 segment_id); + + /// Read postings lists for the term + GinSegmentedPostingsListContainer readSegmentedPostingsLists(const String & term); + + /// Read postings lists for terms (which are created by tokenzing query string) + GinPostingsCachePtr createPostingsCacheFromTerms(const std::vector & terms); + +private: + /// Initialize gin index files + void initFileStreams(); + + /// The store for the reader + GinIndexStorePtr store; + + /// File streams for reading Gin Index + std::unique_ptr metadata_file_stream; + std::unique_ptr dict_file_stream; + std::unique_ptr postings_file_stream; + + /// Current segment, used in building index + GinIndexSegment current_segment; +}; /// PostingsCacheForStore contains postings lists from 'store' which are retrieved from Gin index files for the terms in query strings -/// PostingsCache is per query string(one query can have multiple query strings): when skipping index(row ID ranges) is used for the part during the +/// GinPostingsCache is per query string (one query can have multiple query strings): when skipping index (row ID ranges) is used for the part during the /// query, the postings cache is created and associated with the store where postings lists are read /// for the tokenized query string. The postings caches are released automatically when the query is done. struct PostingsCacheForStore @@ -229,76 +269,31 @@ struct PostingsCacheForStore GinIndexStorePtr store; /// map of - std::unordered_map cache; + std::unordered_map cache; /// Get postings lists for query string, return nullptr if not found - PostingsCachePtr getPostings(const String &query_string) const - { - auto it {cache.find(query_string)}; - - if (it == cache.cend()) - { - return nullptr; - } - return it->second; - } + GinPostingsCachePtr getPostings(const String & query_string) const; }; -/// GinIndexStore Factory, which is a singleton for storing GinIndexStores +/// A singleton for storing GinIndexStores class GinIndexStoreFactory : private boost::noncopyable { public: /// Get singleton of GinIndexStoreFactory - static GinIndexStoreFactory& instance(); + static GinIndexStoreFactory & instance(); /// Get GinIndexStore by using index name, disk and part_path (which are combined to create key in stores) - GinIndexStorePtr get(const String& name, DataPartStoragePtr storage); + GinIndexStorePtr get(const String & name, DataPartStoragePtr storage); /// Remove all Gin index files which are under the same part_path - void remove(const String& part_path); + void remove(const String & part_path); + + /// GinIndexStores indexed by part file path + using GinIndexStores = std::unordered_map; private: GinIndexStores stores; - std::mutex stores_mutex; -}; - -/// Term dictionary information, which contains: - -/// Gin Index Store Reader which helps to read segments, term dictionaries and postings list -class GinIndexStoreDeserializer : private boost::noncopyable -{ -public: - explicit GinIndexStoreDeserializer(const GinIndexStorePtr & store_); - - /// Read all segment information from .gin_seg files - void readSegments(); - - /// Read all term dictionaries from .gin_dict files - void readSegmentTermDictionaries(); - - /// Read term dictionary for given segment id - void readSegmentTermDictionary(UInt32 segment_id); - - /// Read postings lists for the term - SegmentedPostingsListContainer readSegmentedPostingsLists(const String& term); - - /// Read postings lists for terms(which are created by tokenzing query string) - PostingsCachePtr createPostingsCacheFromTerms(const std::vector& terms); - -private: - /// Initialize Gin index files - void initFileStreams(); - - /// The store for the reader - GinIndexStorePtr store; - - /// File streams for reading Gin Index - std::unique_ptr segment_file_stream; - std::unique_ptr term_dict_file_stream; - std::unique_ptr postings_file_stream; - - /// Current segment, used in building index - GinIndexSegment current_segment; + std::mutex mutex; }; } diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 70cc4d3fe70..f92784cb0da 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -1,10 +1,11 @@ #pragma once #include #include +#include #include #include #include -#include +#include #include #include #include @@ -22,7 +23,6 @@ struct CanRemoveDescription { bool can_remove_anything; NameSet files_not_to_remove; - }; using CanRemoveCallback = std::function; @@ -79,6 +79,8 @@ class IDataPartStorage : public boost::noncopyable public: virtual ~IDataPartStorage() = default; + virtual MergeTreeDataPartStorageType getType() const = 0; + /// Methods to get path components of a data part. virtual std::string getFullPath() const = 0; /// '/var/lib/clickhouse/data/database/table/moving/all_1_5_1' virtual std::string getRelativePath() const = 0; /// 'database/table/moving/all_1_5_1' @@ -117,8 +119,6 @@ public: std::optional read_hint, std::optional file_size) const = 0; - virtual void checkConsistency(const MergeTreeDataPartChecksums & checksums) const = 0; - struct ProjectionChecksums { const std::string & name; @@ -133,7 +133,6 @@ public: const MergeTreeDataPartChecksums & checksums, std::list projections, bool is_temp, - MergeTreeDataPartState state, Poco::Logger * log) = 0; /// Get a name like 'prefix_partdir_tryN' which does not exist in a root dir. @@ -157,7 +156,6 @@ public: virtual void syncRevision(UInt64 revision) const = 0; virtual UInt64 getRevision() const = 0; - virtual std::unordered_map getSerializedMetadata(const std::vector & paths) const = 0; /// Get a path for internal disk if relevant. It is used mainly for logging. virtual std::string getDiskPath() const = 0; @@ -172,6 +170,28 @@ public: /// Required for distinguish different copies of the same part on remote FS. virtual String getUniqueId() const = 0; + + /// Represents metadata which is required for fetching of part. + struct ReplicatedFilesDescription + { + using InputBufferGetter = std::function()>; + + struct ReplicatedFileDescription + { + InputBufferGetter input_buffer_getter; + size_t file_size; + }; + + std::map files; + + /// Unique string that is used to distinguish different + /// copies of the same part on remote disk + String unique_id; + }; + + virtual ReplicatedFilesDescription getReplicatedFilesDescription(const NameSet & file_names) const = 0; + virtual ReplicatedFilesDescription getReplicatedFilesDescriptionForRemoteDisk(const NameSet & file_names) const = 0; + /// Create a backup of a data part. /// This method adds a new entry to backup_entries. /// Also creates a new tmp_dir for internal disk (if disk is mentioned the first time). @@ -215,8 +235,16 @@ public: virtual std::unique_ptr writeFile( const String & name, size_t buf_size, + WriteMode mode, const WriteSettings & settings) = 0; - virtual std::unique_ptr writeFile(const String & name, size_t buf_size, WriteMode mode, const WriteSettings & settings) = 0; + + std::unique_ptr writeFile( + const String & name, + size_t buf_size, + const WriteSettings & settings) + { + return writeFile(name, buf_size, WriteMode::Rewrite, settings); + } /// A special const method to write transaction file. /// It's const, because file with transaction metadata @@ -252,6 +280,9 @@ public: virtual void beginTransaction() = 0; /// Commits a transaction of mutable operations. virtual void commitTransaction() = 0; + /// Prepares transaction to commit. + /// It may be flush of buffered data or similar. + virtual void precommitTransaction() = 0; virtual bool hasActiveTransaction() const = 0; }; @@ -279,4 +310,9 @@ private: MutableDataPartStoragePtr storage; }; +inline bool isFullPartStorage(const IDataPartStorage & storage) +{ + return storage.getType() == MergeTreeDataPartStorageType::Full; +} + } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index ec2ea448290..5de13020a1d 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -299,33 +299,6 @@ static void decrementTypeMetric(MergeTreeDataPartType type) } } -IMergeTreeDataPart::IMergeTreeDataPart( - const MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - Type part_type_, - const IMergeTreeDataPart * parent_part_) - : DataPartStorageHolder(data_part_storage_) - , storage(storage_) - , name(name_) - , info(MergeTreePartInfo::fromPartName(name_, storage.format_version)) - , index_granularity_info(storage_, part_type_) - , part_type(part_type_) - , parent_part(parent_part_) - , use_metadata_cache(storage.use_metadata_cache) -{ - if (parent_part) - state = MergeTreeDataPartState::Active; - - incrementStateMetric(state); - incrementTypeMetric(part_type); - - minmax_idx = std::make_shared(); - - initializeIndexGranularityInfo(); - initializePartMetadataManager(); -} - IMergeTreeDataPart::IMergeTreeDataPart( const MergeTreeData & storage_, const String & name_, @@ -471,6 +444,7 @@ void IMergeTreeDataPart::setColumns(const NamesAndTypesList & new_columns, const } columns_description = ColumnsDescription(columns); + columns_description_with_collected_nested = ColumnsDescription(Nested::collect(columns)); } NameAndTypePair IMergeTreeDataPart::getColumn(const String & column_name) const @@ -518,7 +492,8 @@ void IMergeTreeDataPart::removeIfNeeded() String file_name = fileName(getDataPartStorage().getPartDirectory()); if (file_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", getDataPartStorage().getPartDirectory(), name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", + getDataPartStorage().getPartDirectory(), name); if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) { @@ -579,15 +554,15 @@ void IMergeTreeDataPart::assertState(const std::initializer_list && projection_part) +{ + /// Here should be a check that projection we are trying to add + /// does not exist, but unfortunately this check fails in tests. + /// TODO: fix. + projection_parts[projection_name] = std::move(projection_part); +} + void IMergeTreeDataPart::loadProjections(bool require_columns_checksums, bool check_consistency) { auto metadata_snapshot = storage.getInMemoryMetadataPtr(); for (const auto & projection : metadata_snapshot->projections) { - String path = /*getRelativePath() + */ projection.name + ".proj"; + auto path = projection.name + ".proj"; if (getDataPartStorage().exists(path)) { - auto projection_part_storage = getDataPartStorage().getProjection(projection.name + ".proj"); - auto part = storage.createPart(projection.name, {"all", 0, 0, 0}, projection_part_storage, this); + auto part = getProjectionPartBuilder(projection.name).withPartFormatFromDisk().build(); part->loadColumnsChecksumsIndexes(require_columns_checksums, check_consistency); - projection_parts.emplace(projection.name, std::move(part)); + addProjectionPart(projection.name, std::move(part)); } } } void IMergeTreeDataPart::loadIndexGranularity() { - throw Exception("Method 'loadIndexGranularity' is not implemented for part with type " + getType().toString(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Method 'loadIndexGranularity' is not implemented for part with type {}", getType().toString()); } /// Currently we don't cache mark files of part, because cache other meta files is enough to speed up loading. @@ -728,7 +722,7 @@ void IMergeTreeDataPart::loadIndex() { /// It can be empty in case of mutations if (!index_granularity.isInitialized()) - throw Exception("Index granularity is not loaded before index loading", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index granularity is not loaded before index loading"); auto metadata_snapshot = storage.getInMemoryMetadataPtr(); if (parent_part) @@ -764,13 +758,12 @@ void IMergeTreeDataPart::loadIndex() { loaded_index[i]->protect(); if (loaded_index[i]->size() != marks_count) - throw Exception("Cannot read all data from index file " + index_path - + "(expected size: " + toString(marks_count) + ", read: " + toString(loaded_index[i]->size()) + ")", - ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data from index file {}(expected size: " + "{}, read: {})", index_path, marks_count, loaded_index[i]->size()); } if (!index_file->eof()) - throw Exception("Index file " + index_path + " is unexpectedly long", ErrorCodes::EXPECTED_END_OF_FILE); + throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path); index.assign(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); } @@ -863,6 +856,8 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti auto & data_part_storage = getDataPartStorage(); auto tmp_filename = filename + ".tmp"; + data_part_storage.beginTransaction(); + try { { @@ -878,15 +873,20 @@ void IMergeTreeDataPart::writeMetadata(const String & filename, const WriteSetti try { if (data_part_storage.exists(tmp_filename)) + { data_part_storage.removeFile(tmp_filename); + data_part_storage.commitTransaction(); + } } catch (...) { - tryLogCurrentException("DataPartStorageOnDisk"); + tryLogCurrentException("DataPartStorageOnDiskFull"); } throw; } + + data_part_storage.commitTransaction(); } void IMergeTreeDataPart::writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings) @@ -939,7 +939,7 @@ void IMergeTreeDataPart::writeVersionMetadata(const VersionMetadata & version_, } catch (...) { - tryLogCurrentException("DataPartStorageOnDisk"); + tryLogCurrentException("DataPartStorageOnDiskFull"); } throw; @@ -1055,10 +1055,9 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex() auto metadata_snapshot = storage.getInMemoryMetadataPtr(); String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block); if (calculated_partition_id != info.partition_id) - throw Exception( - "While loading part " + getDataPartStorage().getFullPath() + ": calculated partition ID: " + calculated_partition_id - + " differs from partition ID in part name: " + info.partition_id, - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "While loading part {}: " + "calculated partition ID: {} differs from partition ID in part name: {}", + getDataPartStorage().getFullPath(), calculated_partition_id, info.partition_id); } void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) const @@ -1125,7 +1124,7 @@ void IMergeTreeDataPart::loadRowsCount() { bool exists = metadata_manager->exists("count.txt"); if (!exists) - throw Exception("No count.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No count.txt in part {}", name); read_rows_count(); @@ -1147,9 +1146,10 @@ void IMergeTreeDataPart::loadRowsCount() if (rows_in_column != rows_count) { throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Column {} has rows count {} according to size in memory " - "and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count); + ErrorCodes::LOGICAL_ERROR, + "Column {} has rows count {} according to size in memory " + "and size of single value, but data part {} has {} rows", + backQuote(column.name), rows_in_column, name, rows_count); } size_t last_possibly_incomplete_mark_rows = index_granularity.getLastNonFinalMarkRows(); @@ -1159,20 +1159,25 @@ void IMergeTreeDataPart::loadRowsCount() if (rows_in_column < index_granularity_without_last_mark) { throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Column {} has rows count {} according to size in memory " - "and size of single value, but index granularity in part {} without last mark has {} rows, which is more than in column", - backQuote(column.name), rows_in_column, name, index_granularity.getTotalRows()); + ErrorCodes::LOGICAL_ERROR, + "Column {} has rows count {} according to size in memory " + "and size of single value, " + "but index granularity in part {} without last mark has {} rows, which " + "is more than in column", + backQuote(column.name), rows_in_column, name, index_granularity.getTotalRows()); } /// In last mark we actually written less or equal rows than stored in last mark of index granularity if (rows_in_column - index_granularity_without_last_mark > last_possibly_incomplete_mark_rows) { throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Column {} has rows count {} in last mark according to size in memory " - "and size of single value, but index granularity in part {} in last mark has {} rows which is less than in column", - backQuote(column.name), rows_in_column - index_granularity_without_last_mark, name, last_possibly_incomplete_mark_rows); + ErrorCodes::LOGICAL_ERROR, + "Column {} has rows count {} in last mark according to size in memory " + "and size of single value, " + "but index granularity in part {} " + "in last mark has {} rows which is less than in column", + backQuote(column.name), rows_in_column - index_granularity_without_last_mark, + name, last_possibly_incomplete_mark_rows); } } } @@ -1201,24 +1206,22 @@ void IMergeTreeDataPart::loadRowsCount() if (column_size % sizeof_field != 0) { - throw Exception( - "Uncompressed size of column " + column.name + "(" + toString(column_size) - + ") is not divisible by the size of value (" + toString(sizeof_field) + ")", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Uncompressed size of column {}({}) is not divisible by the size of value ({})", + column.name, column_size, sizeof_field); } size_t last_mark_index_granularity = index_granularity.getLastNonFinalMarkRows(); size_t rows_approx = index_granularity.getTotalRows(); if (!(rows_count <= rows_approx && rows_approx < rows_count + last_mark_index_granularity)) - throw Exception( - "Unexpected size of column " + column.name + ": " + toString(rows_count) + " rows, expected " - + toString(rows_approx) + "+-" + toString(last_mark_index_granularity) + " rows according to the index", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of column {}: " + "{} rows, expected {}+-{} rows according to the index", + column.name, rows_count, rows_approx, toString(last_mark_index_granularity)); return; } - throw Exception("Data part doesn't contain fixed size column (even Date column)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Data part doesn't contain fixed size column (even Date column)"); } } @@ -1246,11 +1249,11 @@ void IMergeTreeDataPart::loadTTLInfos() } catch (const JSONException &) { - throw Exception("Error while parsing file ttl.txt in part: " + name, ErrorCodes::BAD_TTL_FILE); + throw Exception(ErrorCodes::BAD_TTL_FILE, "Error while parsing file ttl.txt in part: {}", name); } } else - throw Exception("Unknown ttl format version: " + toString(format_version), ErrorCodes::BAD_TTL_FILE); + throw Exception(ErrorCodes::BAD_TTL_FILE, "Unknown ttl format version: {}", toString(format_version)); } } @@ -1268,7 +1271,7 @@ void IMergeTreeDataPart::loadUUID() auto in = metadata_manager->read(UUID_FILE_NAME); readText(uuid, *in); if (uuid == UUIDHelpers::Nil) - throw Exception("Unexpected empty " + String(UUID_FILE_NAME) + " in part: " + name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty {} in part: {}", String(UUID_FILE_NAME), name); } } @@ -1290,8 +1293,8 @@ void IMergeTreeDataPart::loadColumns(bool require) { /// We can get list of columns only from columns.txt in compact parts. if (require || part_type == Type::Compact) - throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + getDataPartStorage().getDiskName(), - ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns.txt in part {}, expected path {} on drive {}", + name, path, getDataPartStorage().getDiskName()); /// If there is no file with a list of columns, write it down. for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAllPhysical()) @@ -1299,7 +1302,7 @@ void IMergeTreeDataPart::loadColumns(bool require) loaded_columns.push_back(column); if (columns.empty()) - throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); writeColumns(loaded_columns, {}); } @@ -1624,9 +1627,9 @@ void IMergeTreeDataPart::initializePartMetadataManager() void IMergeTreeDataPart::initializeIndexGranularityInfo() { - auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(getDataPartStorage()); - if (mrk_ext) - index_granularity_info = MergeTreeIndexGranularityInfo(storage, MarkType{*mrk_ext}); + auto mrk_type = MergeTreeIndexGranularityInfo::getMarksTypeFromFilesystem(getDataPartStorage()); + if (mrk_type) + index_granularity_info = MergeTreeIndexGranularityInfo(storage, *mrk_type); else index_granularity_info = MergeTreeIndexGranularityInfo(storage, part_type); } @@ -1684,7 +1687,8 @@ void IMergeTreeDataPart::remove() projection_checksums.emplace_back(IDataPartStorage::ProjectionChecksums{.name = p_name, .checksums = projection_part->checksums}); } - getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temp, getState(), storage.log); + bool is_temporary_part = is_temp || state == MergeTreeDataPartState::Temporary; + getDataPartStorage().remove(std::move(can_remove_callback), checksums, projection_checksums, is_temporary_part, storage.log); } std::optional IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached, bool broken) const @@ -1742,10 +1746,10 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix return getDataPartStorage().freeze( storage.relative_data_path, *maybe_path_in_detached, - /*make_source_readonly*/ true, - {}, + /*make_source_readonly=*/ true, + /*save_metadata_callback=*/ {}, copy_instead_of_hardlink, - {}); + /*files_to_copy_instead_of_hardlinks=*/ {}); } MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const @@ -1753,9 +1757,9 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di assertOnDisk(); if (disk->getName() == getDataPartStorage().getDiskName()) - throw Exception("Can not clone data part " + name + " to same disk " + getDataPartStorage().getDiskName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to same disk {}", name, getDataPartStorage().getDiskName()); if (directory_name.empty()) - throw Exception("Can not clone data part " + name + " to empty directory.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to empty directory.", name); String path_to_clone = fs::path(storage.relative_data_path) / directory_name / ""; return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); @@ -1778,28 +1782,28 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!pk.column_names.empty() && (!checksums.files.contains("primary" + getIndexExtension(false)) && !checksums.files.contains("primary" + getIndexExtension(true)))) - throw Exception("No checksum for " + toString("primary" + getIndexExtension(false)) + " or " + toString("primary" + getIndexExtension(true)), - ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for {} or {}", + toString("primary" + getIndexExtension(false)), toString("primary" + getIndexExtension(true))); if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { if (!checksums.files.contains("count.txt")) - throw Exception("No checksum for count.txt", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for count.txt"); if (metadata_snapshot->hasPartitionKey() && !checksums.files.contains("partition.dat")) - throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for partition.dat"); if (!isEmpty() && !parent_part) { for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) { if (!checksums.files.contains("minmax_" + escapeForFileName(col_name) + ".idx")) - throw Exception("No minmax idx file checksum for column " + col_name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No minmax idx file checksum for column {}", col_name); } } } - getDataPartStorage().checkConsistency(checksums); + checksums.checkSizes(getDataPartStorage()); } else { @@ -1840,7 +1844,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const void IMergeTreeDataPart::checkConsistency(bool /* require_part_metadata */) const { - throw Exception("Method 'checkConsistency' is not implemented for part with type " + getType().toString(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'checkConsistency' is not implemented for part with type {}", getType().toString()); } void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk() @@ -1852,7 +1856,7 @@ void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk() void IMergeTreeDataPart::calculateColumnsSizesOnDisk() { if (getColumns().empty() || checksums.empty()) - throw Exception("Cannot calculate columns sizes when columns or checksums are not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot calculate columns sizes when columns or checksums are not initialized"); calculateEachColumnSizes(columns_sizes, total_columns_size); } @@ -1860,7 +1864,7 @@ void IMergeTreeDataPart::calculateColumnsSizesOnDisk() void IMergeTreeDataPart::calculateSecondaryIndicesSizesOnDisk() { if (checksums.empty()) - throw Exception("Cannot calculate secondary indexes sizes when columns or checksums are not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot calculate secondary indexes sizes when columns or checksums are not initialized"); auto secondary_indices_descriptions = storage.getInMemoryMetadataPtr()->secondary_indices; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 2c5169a1729..9d0252bd625 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -1,6 +1,6 @@ #pragma once -#include "IO/WriteSettings.h" +#include #include #include #include @@ -17,13 +17,12 @@ #include #include #include +#include #include #include #include #include -#include - namespace zkutil { @@ -87,13 +86,6 @@ public: Type part_type_, const IMergeTreeDataPart * parent_part_); - IMergeTreeDataPart( - const MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - Type part_type_, - const IMergeTreeDataPart * parent_part_); - virtual MergeTreeReaderPtr getReader( const NamesAndTypesList & columns_, const StorageMetadataPtr & metadata_snapshot, @@ -143,6 +135,7 @@ public: void accumulateColumnSizes(ColumnToSize & /* column_to_size */) const; Type getType() const { return part_type; } + MergeTreeDataPartFormat getFormat() const { return {part_type, getDataPartStorage().getType()}; } String getTypeName() const { return getType().toString(); } @@ -150,6 +143,7 @@ public: const NamesAndTypesList & getColumns() const { return columns; } const ColumnsDescription & getColumnsDescription() const { return columns_description; } + const ColumnsDescription & getColumnsDescriptionWithCollectedNested() const { return columns_description_with_collected_nested; } NameAndTypePair getColumn(const String & name) const; std::optional tryGetColumn(const String & column_name) const; @@ -361,15 +355,11 @@ public: const std::map> & getProjectionParts() const { return projection_parts; } - void addProjectionPart(const String & projection_name, std::shared_ptr && projection_part) - { - projection_parts.emplace(projection_name, std::move(projection_part)); - } + MergeTreeDataPartBuilder getProjectionPartBuilder(const String & projection_name, bool is_temp_projection = false); - bool hasProjection(const String & projection_name) const - { - return projection_parts.find(projection_name) != projection_parts.end(); - } + void addProjectionPart(const String & projection_name, std::shared_ptr && projection_part); + + bool hasProjection(const String & projection_name) const { return projection_parts.contains(projection_name); } void loadProjections(bool require_columns_checksums, bool check_consistency); @@ -533,6 +523,10 @@ private: /// to columns by name and getting subcolumns. ColumnsDescription columns_description; + /// The same as above but after call of Nested::collect(). + /// It is used while reading from wide parts. + ColumnsDescription columns_description_with_collected_nested; + /// Reads part unique identifier (if exists) from uuid.txt void loadUUID(); @@ -606,6 +600,7 @@ using MergeTreeMutableDataPartPtr = std::shared_ptr; bool isCompactPart(const MergeTreeDataPartPtr & data_part); bool isWidePart(const MergeTreeDataPartPtr & data_part); bool isInMemoryPart(const MergeTreeDataPartPtr & data_part); + inline String getIndexExtension(bool is_compressed_primary_key) { return is_compressed_primary_key ? ".cidx" : ".idx"; } std::optional getIndexExtensionFromFilesystem(const IDataPartStorage & data_part_storage); bool isCompressedFromIndexExtension(const String & index_extension); diff --git a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h index 2e4972c2788..648c3cfbb6b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartInfoForReader.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include namespace DB @@ -41,6 +42,10 @@ public: virtual const NamesAndTypesList & getColumns() const = 0; + virtual const ColumnsDescription & getColumnsDescription() const = 0; + + virtual const ColumnsDescription & getColumnsDescriptionWithCollectedNested() const = 0; + virtual std::optional getColumnPosition(const String & column_name) const = 0; virtual String getColumnNameWithMinimumCompressedSize(bool with_subcolumns) const = 0; diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 8711664d531..d8c3de622cb 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -41,8 +41,12 @@ IMergeTreeReader::IMergeTreeReader( , alter_conversions(data_part_info_for_read->getAlterConversions()) /// For wide parts convert plain arrays of Nested to subcolumns /// to allow to use shared offset column from cache. - , requested_columns(data_part_info_for_read->isWidePart() ? Nested::convertToSubcolumns(columns_) : columns_) - , part_columns(data_part_info_for_read->isWidePart() ? Nested::collect(data_part_info_for_read->getColumns()) : data_part_info_for_read->getColumns()) + , requested_columns(data_part_info_for_read->isWidePart() + ? Nested::convertToSubcolumns(columns_) + : columns_) + , part_columns(data_part_info_for_read->isWidePart() + ? data_part_info_for_read->getColumnsDescriptionWithCollectedNested() + : data_part_info_for_read->getColumnsDescription()) { columns_to_read.reserve(requested_columns.size()); serializations.reserve(requested_columns.size()); @@ -88,9 +92,8 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns size_t num_columns = requested_columns.size(); if (res_columns.size() != num_columns) - throw Exception("invalid number of columns passed to MergeTreeReader::fillMissingColumns. " - "Expected " + toString(num_columns) + ", " - "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "invalid number of columns passed to MergeTreeReader::fillMissingColumns. " + "Expected {}, got {}", num_columns, res_columns.size()); /// Convert columns list to block. /// TODO: rewrite with columns interface. It will be possible after changes in ExpressionActions. @@ -171,14 +174,9 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const if (res_columns.size() != num_columns) { - throw Exception( - "Invalid number of columns passed to MergeTreeReader::performRequiredConversions. " - "Expected " - + toString(num_columns) - + ", " - "got " - + toString(res_columns.size()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid number of columns passed to MergeTreeReader::performRequiredConversions. " + "Expected {}, got {}", num_columns, res_columns.size()); } Block copy_block; @@ -262,9 +260,8 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const { if (num_columns_to_read != requested_columns.size()) - throw Exception("invalid number of columns passed to MergeTreeReader::readRows. " - "Expected " + toString(requested_columns.size()) + ", " - "got " + toString(num_columns_to_read), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "invalid number of columns passed to MergeTreeReader::readRows. " + "Expected {}, got {}", requested_columns.size(), num_columns_to_read); } } diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 16db13692aa..4a383e4e521 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -104,7 +104,7 @@ private: NamesAndTypesList requested_columns; /// Actual columns description in part. - ColumnsDescription part_columns; + const ColumnsDescription & part_columns; }; } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 2340bdd99b2..fda1daec3a3 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -739,12 +739,9 @@ KeyCondition::KeyCondition( , single_point(single_point_) , strict(strict_) { - for (size_t i = 0, size = key_column_names.size(); i < size; ++i) - { - const auto & name = key_column_names[i]; + for (const auto & name : key_column_names) if (!key_columns.contains(name)) - key_columns[name] = i; - } + key_columns[name] = key_columns.size(); auto filter_node = buildFilterNode(query, additional_filter_asts); @@ -807,12 +804,9 @@ KeyCondition::KeyCondition( , single_point(single_point_) , strict(strict_) { - for (size_t i = 0, size = key_column_names.size(); i < size; ++i) - { - const auto & name = key_column_names[i]; + for (const auto & name : key_column_names) if (!key_columns.contains(name)) - key_columns[name] = i; - } + key_columns[name] = key_columns.size(); if (!filter_dag) { @@ -1410,10 +1404,8 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, } catch (...) { - throw Exception("Key expression contains comparison between inconvertible types: " + - desired_type->getName() + " and " + src_type->getName() + - " inside " + node_column_name, - ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Key expression contains comparison between inconvertible types: " + "{} and {} inside {}", desired_type->getName(), src_type->getName(), node_column_name); } } @@ -1445,7 +1437,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme return false; if (key_column_num == static_cast(-1)) - throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "`key_column_num` wasn't initialized. It is a bug."); } else if (num_args == 2) { @@ -1544,7 +1536,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme return false; if (key_column_num == static_cast(-1)) - throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "`key_column_num` wasn't initialized. It is a bug."); /// Replace on to <-sign> if (key_arg_pos == 1) @@ -1844,11 +1836,11 @@ KeyCondition::Description KeyCondition::getDescription() const rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)}); } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::checkInRange"); std::vector key_names(key_columns.size()); std::vector is_key_used(key_columns.size(), false); @@ -2276,7 +2268,7 @@ BoolMask KeyCondition::checkInHyperrectangle( || element.function == RPNElement::FUNCTION_NOT_IN_SET) { if (!element.set_index) - throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Set for IN is not created yet"); rpn_stack.emplace_back(element.set_index->checkInRange(hyperrectangle, data_types, single_point)); if (element.function == RPNElement::FUNCTION_NOT_IN_SET) @@ -2315,11 +2307,11 @@ BoolMask KeyCondition::checkInHyperrectangle( rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::checkInRange"); return rpn_stack[0]; } @@ -2481,11 +2473,11 @@ bool KeyCondition::unknownOrAlwaysTrue(bool unknown_any) const rpn_stack.back() = arg1 | arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::unknownOrAlwaysTrue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::unknownOrAlwaysTrue"); return rpn_stack[0]; } @@ -2556,11 +2548,11 @@ bool KeyCondition::alwaysFalse() const rpn_stack.back() = 2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::alwaysFalse", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::alwaysFalse"); return rpn_stack[0] == 0; } diff --git a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h index bc786ec0428..3363c75dd6f 100644 --- a/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h +++ b/src/Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h @@ -27,6 +27,10 @@ public: const NamesAndTypesList & getColumns() const override { return data_part->getColumns(); } + const ColumnsDescription & getColumnsDescription() const override { return data_part->getColumnsDescription(); } + + const ColumnsDescription & getColumnsDescriptionWithCollectedNested() const override { return data_part->getColumnsDescriptionWithCollectedNested(); } + std::optional getColumnPosition(const String & column_name) const override { return data_part->getColumnPosition(column_name); } AlterConversions getAlterConversions() const override { return data_part->storage.getAlterConversionsForPart(data_part); } diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index 903940efa94..0eea0e5afd1 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -18,12 +18,18 @@ bool MarkRange::operator<(const MarkRange & rhs) const /// We allow only consecutive non-intersecting ranges /// Here we check whether a beginning of one range lies inside another range /// (ranges are intersect) - const bool is_intersection = (begin <= rhs.begin && rhs.begin < end) || - (rhs.begin <= begin && begin < rhs.end); + if (this != &rhs) + { + const bool is_intersection = (begin <= rhs.begin && rhs.begin < end) || + (rhs.begin <= begin && begin < rhs.end); - if (is_intersection) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Intersecting mark ranges are not allowed, it is a bug! First range ({}, {}), second range ({}, {})", begin, end, rhs.begin, rhs.end); + if (is_intersection) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Intersecting mark ranges are not allowed, it is a bug! " + "First range ({}, {}), second range ({}, {})", + begin, end, rhs.begin, rhs.end); + } return begin < rhs.begin && end <= rhs.begin; } @@ -48,4 +54,15 @@ std::string toString(const MarkRanges & ranges) return result; } +void assertSortedAndNonIntersecting(const MarkRanges & ranges) +{ + MarkRanges ranges_copy(ranges.begin(), ranges.end()); + /// Should also throw an exception if interseting range is found during comparison. + std::sort(ranges_copy.begin(), ranges_copy.end()); + if (ranges_copy != ranges) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Expected sorted and non intersecting ranges. Ranges: {}", + toString(ranges)); +} + } diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index fe02eb056b7..076fc7dfea2 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -34,4 +34,6 @@ size_t getLastMark(const MarkRanges & ranges); std::string toString(const MarkRanges & ranges); +void assertSortedAndNonIntersecting(const MarkRanges & ranges); + } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 97226825629..dd8781691b8 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -94,7 +94,9 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() if (!source_part_or_covering) { /// We do not have one of source parts locally, try to take some already merged part from someone. - LOG_DEBUG(log, "Don't have all parts (at least part {} is missing) for merge {}; will try to fetch it instead", source_part_name, entry.new_part_name); + LOG_DEBUG(log, "Don't have all parts (at least {} is missing) for merge {}; will try to fetch it instead. " + "Either pool for fetches is starving, see background_fetches_pool_size, or none of active replicas has it", + source_part_name, entry.new_part_name); return PrepareResult{ .prepared_successfully = false, .need_to_check_missing_part_in_fetch = true, @@ -113,7 +115,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() String message; LOG_WARNING(LogToStr(message, log), fmt_string, source_part_name, source_part_or_covering->name, entry.new_part_name); if (!source_part_or_covering->info.contains(MergeTreePartInfo::fromPartName(entry.new_part_name, storage.format_version))) - throw Exception(ErrorCodes::LOGICAL_ERROR, message); + throw Exception::createDeprecated(message, ErrorCodes::LOGICAL_ERROR); return PrepareResult{ .prepared_successfully = false, @@ -171,11 +173,11 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() StorageMetadataPtr metadata_snapshot = storage.getInMemoryMetadataPtr(); - auto future_merged_part = std::make_shared(parts, entry.new_part_type); + auto future_merged_part = std::make_shared(parts, entry.new_part_format); if (future_merged_part->name != entry.new_part_name) { - throw Exception("Future merged part name " + backQuote(future_merged_part->name) + " differs from part name in log entry: " - + backQuote(entry.new_part_name), ErrorCodes::BAD_DATA_PART_NAME); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Future merged part name {} differs from part name in log entry: {}", + backQuote(future_merged_part->name), backQuote(entry.new_part_name)); } std::optional tagger; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 1a411f1b2cd..5874c257ad0 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include @@ -113,11 +113,12 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() if (isTTLMergeType(global_ctx->future_part->merge_type) && global_ctx->ttl_merges_blocker->isCancelled()) throw Exception(ErrorCodes::ABORTED, "Cancelled merging parts with TTL"); - LOG_DEBUG(ctx->log, "Merging {} parts: from {} to {} into {}", + LOG_DEBUG(ctx->log, "Merging {} parts: from {} to {} into {} with storage {}", global_ctx->future_part->parts.size(), global_ctx->future_part->parts.front()->name, global_ctx->future_part->parts.back()->name, - global_ctx->future_part->type.toString()); + global_ctx->future_part->part_format.part_type.toString(), + global_ctx->future_part->part_format.storage_type.toString()); if (global_ctx->deduplicate) { @@ -128,31 +129,36 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() } ctx->disk = global_ctx->space_reservation->getDisk(); + auto local_tmp_part_basename = local_tmp_prefix + global_ctx->future_part->name + local_tmp_suffix; - String local_tmp_part_basename = local_tmp_prefix + global_ctx->future_part->name + local_tmp_suffix; - MutableDataPartStoragePtr data_part_storage; - + std::optional builder; if (global_ctx->parent_part) { - data_part_storage = global_ctx->parent_part->getDataPartStorage().getProjection(local_tmp_part_basename); + auto data_part_storage = global_ctx->parent_part->getDataPartStorage().getProjection(local_tmp_part_basename); + builder.emplace(*global_ctx->data, global_ctx->future_part->name, data_part_storage); + builder->withParentPart(global_ctx->parent_part); } else { auto local_single_disk_volume = std::make_shared("volume_" + global_ctx->future_part->name, ctx->disk, 0); - - data_part_storage = std::make_shared( - local_single_disk_volume, - global_ctx->data->relative_data_path, - local_tmp_part_basename); - - data_part_storage->beginTransaction(); + builder.emplace(global_ctx->data->getDataPartBuilder(global_ctx->future_part->name, local_single_disk_volume, local_tmp_part_basename)); + builder->withPartStorageType(global_ctx->future_part->part_format.storage_type); } + builder->withPartInfo(global_ctx->future_part->part_info); + builder->withPartType(global_ctx->future_part->part_format.part_type); + + global_ctx->new_data_part = std::move(*builder).build(); + auto data_part_storage = global_ctx->new_data_part->getDataPartStoragePtr(); + if (data_part_storage->exists()) - throw Exception("Directory " + data_part_storage->getFullPath() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists", data_part_storage->getFullPath()); if (!global_ctx->parent_part) + { + data_part_storage->beginTransaction(); global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename); + } global_ctx->all_column_names = global_ctx->metadata_snapshot->getColumns().getNamesOfPhysical(); global_ctx->storage_columns = global_ctx->metadata_snapshot->getColumns().getAllPhysical(); @@ -171,13 +177,6 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() global_ctx->merging_columns, global_ctx->merging_column_names); - global_ctx->new_data_part = global_ctx->data->createPart( - global_ctx->future_part->name, - global_ctx->future_part->type, - global_ctx->future_part->part_info, - data_part_storage, - global_ctx->parent_part); - global_ctx->new_data_part->uuid = global_ctx->future_part->uuid; global_ctx->new_data_part->partition.assign(global_ctx->future_part->getPartition()); global_ctx->new_data_part->is_temp = global_ctx->parent_part == nullptr; @@ -263,7 +262,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() break; } default : - throw Exception("Merge algorithm must be chosen", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge algorithm must be chosen"); } assert(global_ctx->gathering_columns.size() == global_ctx->gathering_column_names.size()); @@ -447,9 +446,10 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const /// number of input rows. if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) && sum_input_rows_exact != rows_sources_count + input_rows_filtered) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number of bytes written to rows_sources file ({}). It is a bug.", - sum_input_rows_exact, input_rows_filtered, rows_sources_count); + ErrorCodes::LOGICAL_ERROR, + "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number " + "of bytes written to rows_sources file ({}). It is a bug.", + sum_input_rows_exact, input_rows_filtered, rows_sources_count); ctx->rows_sources_read_buf = std::make_unique(ctx->tmp_disk->readFile(fileName(ctx->rows_sources_file->path()))); @@ -551,8 +551,8 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const if (global_ctx->rows_written != ctx->column_elems_written) { - throw Exception("Written " + toString(ctx->column_elems_written) + " elements of column " + column_name + - ", but " + toString(global_ctx->rows_written) + " rows of PK columns", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Written {} elements of column {}, but {} rows of PK columns", + toString(ctx->column_elems_written), column_name, toString(global_ctx->rows_written)); } UInt64 rows = 0; @@ -698,9 +698,9 @@ bool MergeTask::MergeProjectionsStage::finalizeProjectionsAndWholeMerge() const if (global_ctx->chosen_merge_algorithm != MergeAlgorithm::Vertical) global_ctx->to->finalizePart(global_ctx->new_data_part, ctx->need_sync); else - global_ctx->to->finalizePart( - global_ctx->new_data_part, ctx->need_sync, &global_ctx->storage_columns, &global_ctx->checksums_gathered_columns); + global_ctx->to->finalizePart(global_ctx->new_data_part, ctx->need_sync, &global_ctx->storage_columns, &global_ctx->checksums_gathered_columns); + global_ctx->new_data_part->getDataPartStorage().precommitTransaction(); global_ctx->promise.set_value(global_ctx->new_data_part); return false; @@ -818,7 +818,6 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream() global_ctx->horizontal_stage_progress = std::make_unique( ctx->column_sizes ? ctx->column_sizes->keyColumnsWeight() : 1.0); - for (const auto & part : global_ctx->future_part->parts) { Pipe pipe = createMergeTreeSequentialSource( @@ -956,7 +955,7 @@ MergeAlgorithm MergeTask::ExecuteAndFinalizeHorizontalPart::chooseMergeAlgorithm return MergeAlgorithm::Horizontal; for (const auto & part : global_ctx->future_part->parts) - if (!part->supportsVerticalMerge()) + if (!part->supportsVerticalMerge() || !isFullPartStorage(part->getDataPartStorage())) return MergeAlgorithm::Horizontal; bool is_supported_storage = diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 5b6b0f09bc3..e2997df3bb0 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -406,9 +406,9 @@ IMergeTreeSelectAlgorithm::BlockAndProgress IMergeTreeSelectAlgorithm::readFromP const auto & sample_block = task->range_reader.getSampleBlock(); if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size()) - throw Exception("Inconsistent number of columns got from MergeTreeRangeReader. " - "Have " + toString(sample_block.columns()) + " in sample block " - "and " + toString(read_result.columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent number of columns got from MergeTreeRangeReader. " + "Have {} in sample block and {} columns in list", + toString(sample_block.columns()), toString(read_result.columns.size())); /// TODO: check columns have the same types as in header. @@ -545,8 +545,7 @@ static void injectPartConstVirtualColumns( if (!virtual_columns.empty()) { if (unlikely(rows && !task)) - throw Exception("Cannot insert virtual columns to non-empty chunk without specified task.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task."); const IMergeTreeDataPart * part = nullptr; if (rows) @@ -627,8 +626,8 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); if (!row_level_column.type->canBeUsedInBooleanContext()) { - throw Exception("Invalid type for filter in PREWHERE: " + row_level_column.type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + row_level_column.type->getName()); } block.erase(prewhere_info->row_level_column_name); @@ -640,8 +639,8 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) { - throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + prewhere_column.type->getName()); } if (prewhere_info->remove_prewhere_column) @@ -655,7 +654,8 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); else throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Illegal type {} of column for filter", prewhere_column.type->getName()); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Illegal type {} of column for filter", prewhere_column.type->getName()); } } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 525d76d0f0f..6bd8cc60979 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -233,9 +233,9 @@ void MergeTreeBlockSizePredictor::startBlock() void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay) { if (columns.size() != sample_block.columns()) - throw Exception("Inconsistent number of columns passed to MergeTreeBlockSizePredictor. " - "Have " + toString(sample_block.columns()) + " in sample block " - "and " + toString(columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent number of columns passed to MergeTreeBlockSizePredictor. " + "Have {} in sample block and {} columns in list", + toString(sample_block.columns()), toString(columns.size())); if (!is_initialized_in_update) { @@ -246,8 +246,8 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum if (num_rows < block_size_rows) { - throw Exception("Updated block has less rows (" + toString(num_rows) + ") than previous one (" + toString(block_size_rows) + ")", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Updated block has less rows ({}) than previous one ({})", + num_rows, block_size_rows); } size_t diff_rows = num_rows - block_size_rows; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9640e2fb366..b2e0c14489a 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1,3 +1,4 @@ +#include "Storages/MergeTree/MergeTreeDataPartBuilder.h" #include #include @@ -44,7 +45,7 @@ #include #include #include -#include +#include #include #include #include @@ -147,7 +148,6 @@ namespace ErrorCodes extern const int BAD_DATA_PART_NAME; extern const int READONLY_SETTING; extern const int ABORTED; - extern const int UNKNOWN_PART_TYPE; extern const int UNKNOWN_DISK; extern const int NOT_ENOUGH_SPACE; extern const int ALTER_OF_COLUMN_IS_FORBIDDEN; @@ -166,11 +166,11 @@ namespace ErrorCodes static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct) { if (metadata.sampling_key.column_names.empty()) - throw Exception("There are no columns in sampling expression", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "There are no columns in sampling expression"); const auto & pk_sample_block = metadata.getPrimaryKey().sample_block; if (!pk_sample_block.has(metadata.sampling_key.column_names[0]) && !allow_sampling_expression_not_in_primary_key) - throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sampling expression must be present in the primary key"); if (!check_sample_column_is_correct) return; @@ -192,10 +192,9 @@ static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool } if (!is_correct_sample_condition) - throw Exception( - "Invalid sampling column type in storage parameters: " + sampling_column_type->getName() - + ". Must be one unsigned integer type", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid sampling column type in storage parameters: {}. Must be one unsigned integer type", + sampling_column_type->getName()); } @@ -208,7 +207,7 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; if (relative_data_path.empty()) - throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "MergeTree storages require data path"); const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME; std::optional read_format_version; @@ -235,7 +234,9 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (!read_format_version.has_value()) read_format_version = current_format_version; else if (*read_format_version != current_format_version) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Version file on {} contains version {} expected version is {}.", fullPath(disk, format_version_path), current_format_version, *read_format_version); + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Version file on {} contains version {} expected version is {}.", + fullPath(disk, format_version_path), current_format_version, *read_format_version); } } @@ -270,9 +271,7 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (format_version < min_format_version) { if (min_format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING.toUnderType()) - throw Exception( - "MergeTree data format version on disk doesn't support custom partitioning", - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "MergeTree data format version on disk doesn't support custom partitioning"); } } @@ -319,7 +318,7 @@ MergeTreeData::MergeTreeData( checkPartitionKeyAndInitMinMax(metadata_.partition_key); setProperties(metadata_, metadata_, attach); if (minmax_idx_date_column_pos == -1) - throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Could not find Date column"); } catch (Exception & e) { @@ -392,7 +391,7 @@ bool MergeTreeData::supportsFinal() const static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name, bool allow_nullable_key) { if (expr.hasArrayJoin()) - throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} key cannot contain array joins", key_name); try { @@ -412,7 +411,9 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam if (!allow_nullable_key && hasNullable(element.type)) throw Exception( - ErrorCodes::ILLEGAL_COLUMN, "{} key contains nullable columns, but merge tree setting `allow_nullable_key` is disabled", key_name); + ErrorCodes::ILLEGAL_COLUMN, + "{} key contains nullable columns, " + "but merge tree setting `allow_nullable_key` is disabled", key_name); } } @@ -420,7 +421,7 @@ void MergeTreeData::checkProperties( const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) const { if (!new_metadata.sorting_key.definition_ast) - throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); KeyDescription new_sorting_key = new_metadata.sorting_key; KeyDescription new_primary_key = new_metadata.primary_key; @@ -428,9 +429,8 @@ void MergeTreeData::checkProperties( size_t sorting_key_size = new_sorting_key.column_names.size(); size_t primary_key_size = new_primary_key.column_names.size(); if (primary_key_size > sorting_key_size) - throw Exception("Primary key must be a prefix of the sorting key, but its length: " - + toString(primary_key_size) + " is greater than the sorting key length: " + toString(sorting_key_size), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " + "{} is greater than the sorting key length: {}", primary_key_size, sorting_key_size); NameSet primary_key_columns_set; @@ -442,12 +442,12 @@ void MergeTreeData::checkProperties( { const String & pk_column = new_primary_key.column_names[i]; if (pk_column != sorting_key_column) - throw Exception("Primary key must be a prefix of the sorting key, but the column in the position " - + toString(i) + " is " + sorting_key_column +", not " + pk_column, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Primary key must be a prefix of the sorting key, " + "but the column in the position {} is {}", i, sorting_key_column +", not " + pk_column); if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception("Primary key contains duplicate columns", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); } } @@ -490,14 +490,15 @@ void MergeTreeData::checkProperties( for (const String & col : used_columns) { if (!added_columns.contains(col) || deleted_columns.contains(col)) - throw Exception("Existing column " + backQuoteIfNeed(col) + " is used in the expression that was " - "added to the sorting key. You can add expressions that use only the newly added columns", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Existing column {} is used in the expression that was added to the sorting key. " + "You can add expressions that use only the newly added columns", + backQuoteIfNeed(col)); if (new_metadata.columns.getDefaults().contains(col)) - throw Exception("Newly added column " + backQuoteIfNeed(col) + " has a default expression, so adding " - "expressions that use it to the sorting key is forbidden", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Newly added column {} has a default expression, so adding expressions that use " + "it to the sorting key is forbidden", backQuoteIfNeed(col)); } } } @@ -512,9 +513,7 @@ void MergeTreeData::checkProperties( MergeTreeIndexFactory::instance().validate(index, attach); if (indices_names.find(index.name) != indices_names.end()) - throw Exception( - "Index with name " + backQuote(index.name) + " already exists", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index with name {} already exists", backQuote(index.name)); indices_names.insert(index.name); } @@ -527,9 +526,7 @@ void MergeTreeData::checkProperties( for (const auto & projection : new_metadata.projections) { if (projections_names.find(projection.name) != projections_names.end()) - throw Exception( - "Projection with name " + backQuote(projection.name) + " already exists", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection with name {} already exists", backQuote(projection.name)); projections_names.insert(projection.name); } @@ -674,7 +671,7 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta for (const auto & [name, ttl_description] : new_column_ttls) { if (columns_ttl_forbidden.contains(name)) - throw Exception("Trying to set TTL for key column " + name, ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Trying to set TTL for key column {}", name); } } auto new_table_ttl = new_metadata.table_ttl; @@ -685,13 +682,12 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta { if (!move_ttl.if_exists && !getDestinationForMoveTTL(move_ttl)) { - String message; if (move_ttl.destination_type == DataDestinationType::DISK) - message = "No such disk " + backQuote(move_ttl.destination_name) + " for given storage policy"; + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, + "No such disk {} for given storage policy", backQuote(move_ttl.destination_name)); else - message = "No such volume " + backQuote(move_ttl.destination_name) + " for given storage policy"; - - throw Exception(message, ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, + "No such volume {} for given storage policy", backQuote(move_ttl.destination_name)); } } } @@ -710,16 +706,17 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat const auto columns = metadata.getColumns().getAllPhysical(); if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing) - throw Exception("Sign column for MergeTree cannot be specified in modes except Collapsing or VersionedCollapsing.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Sign column for MergeTree cannot be specified " + "in modes except Collapsing or VersionedCollapsing."); if (!version_column.empty() && mode != MergingParams::Replacing && mode != MergingParams::VersionedCollapsing) - throw Exception("Version column for MergeTree cannot be specified in modes except Replacing or VersionedCollapsing.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Version column for MergeTree cannot be specified " + "in modes except Replacing or VersionedCollapsing."); if (!columns_to_sum.empty() && mode != MergingParams::Summing) - throw Exception("List of columns to sum for MergeTree cannot be specified in all modes except Summing.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "List of columns to sum for MergeTree cannot be specified in all modes except Summing."); /// Check that if the sign column is needed, it exists and is of type Int8. auto check_sign_column = [this, & columns](bool is_optional, const std::string & storage) @@ -729,7 +726,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (is_optional) return; - throw Exception("Logical error: Sign column for storage " + storage + " is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Sign column for storage {} is empty", storage); } bool miss_column = true; @@ -738,14 +735,14 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (column.name == sign_column) { if (!typeid_cast(column.type.get())) - throw Exception("Sign column (" + sign_column + ") for storage " + storage + " must have type Int8." - " Provided column of type " + column.type->getName() + ".", ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Sign column ({}) for storage {} must have type Int8. " + "Provided column of type {}.", sign_column, storage, column.type->getName()); miss_column = false; break; } } if (miss_column) - throw Exception("Sign column " + sign_column + " does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Sign column {} does not exist in table declaration.", sign_column); }; /// that if the version_column column is needed, it exists and is of unsigned integer type. @@ -756,7 +753,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (is_optional) return; - throw Exception("Logical error: Version column for storage " + storage + " is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column for storage {} is empty", storage); } bool miss_column = true; @@ -765,16 +762,16 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (column.name == version_column) { if (!column.type->canBeUsedAsVersion()) - throw Exception("The column " + version_column + - " cannot be used as a version column for storage " + storage + - " because it is of type " + column.type->getName() + - " (must be of an integer type or of type Date/DateTime/DateTime64)", ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, + "The column {} cannot be used as a version column for storage {} because it is " + "of type {} (must be of an integer type or of type Date/DateTime/DateTime64)", + version_column, storage, column.type->getName()); miss_column = false; break; } } if (miss_column) - throw Exception("Version column " + version_column + " does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Version column {} does not exist in table declaration.", version_column); }; if (mode == MergingParams::Collapsing) @@ -790,8 +787,9 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat return column_to_sum == Nested::extractTableName(name_and_type.name); }; if (columns.end() == std::find_if(columns.begin(), columns.end(), check_column_to_sum_exists)) - throw Exception( - "Column " + column_to_sum + " listed in columns to sum does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "Column {} listed in columns to sum does not exist in table declaration.", + column_to_sum); } /// Check that summing columns are not in partition key. @@ -805,8 +803,8 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat std::back_inserter(names_intersection)); if (!names_intersection.empty()) - throw Exception("Columns: " + boost::algorithm::join(names_intersection, ", ") + - " listed both in columns to sum and in partition key. That is not allowed.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Columns: {} listed both in columns to sum and in partition key. " + "That is not allowed.", boost::algorithm::join(names_intersection, ", ")); } } @@ -1099,9 +1097,12 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( LoadPartResult res; auto single_disk_volume = std::make_shared("volume_" + part_name, part_disk_ptr, 0); - auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, part_name); + auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, part_name); - res.part = createPart(part_name, part_info, data_part_storage); + res.part = getDataPartBuilder(part_name, single_disk_volume, part_name) + .withPartInfo(part_info) + .withPartFormatFromDisk() + .build(); String part_path = fs::path(relative_data_path) / part_name; String marker_path = fs::path(part_path) / IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME; @@ -1244,7 +1245,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( return res; } else - throw Exception("Part " + res.part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists but with different checksums", res.part->name); } if (to_state == DataPartState::Active) @@ -1252,7 +1253,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( LOG_TRACE(log, "Finished loading {} part {} on disk {}", magic_enum::enum_name(to_state), part_name, part_disk_ptr->getName()); return res; -}; +} std::vector MergeTreeData::loadDataPartsFromDisk( ThreadPool & pool, @@ -1428,7 +1429,7 @@ void MergeTreeData::loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex()) LOG_ERROR(log, "Remove duplicate part {}", part->getDataPartStorage().getFullPath()); else - throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists but with different checksums", part->name); } else { @@ -1624,9 +1625,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) { std::lock_guard lock(wal_init_lock); if (write_ahead_log != nullptr) - throw Exception( - "There are multiple WAL files appeared in current storage policy. You need to resolve this manually", - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, + "There are multiple WAL files appeared in current storage policy. " + "You need to resolve this manually"); write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext(), part_lock, is_static_storage)) @@ -1660,9 +1661,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } if (have_non_adaptive_parts && have_adaptive_parts && !settings->enable_mixed_granularity_parts) - throw Exception( - "Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table contains parts with adaptive and non adaptive marks, " + "but `setting enable_mixed_granularity_parts` is disabled"); has_non_adaptive_index_granularity_parts = have_non_adaptive_parts; has_lightweight_delete_parts = have_lightweight_in_parts; @@ -1670,10 +1671,13 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, - "Suspiciously many ({} parts, {} in total) broken parts to remove while maximum allowed broken parts count is {}. You can change the maximum value " - "with merge tree setting 'max_suspicious_broken_parts' in configuration section or in table settings in .sql file " + "Suspiciously many ({} parts, {} in total) broken parts " + "to remove while maximum allowed broken parts count is {}. You can change the maximum value " + "with merge tree setting 'max_suspicious_broken_parts' " + "in configuration section or in table settings in .sql file " "(don't forget to return setting back to default value)", - suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes), settings->max_suspicious_broken_parts); + suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes), + settings->max_suspicious_broken_parts); if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks) throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, @@ -1775,7 +1779,7 @@ catch (...) { LOG_ERROR(log, "Loading of outdated parts failed. " "Will terminate to avoid undefined behaviour due to inconsistent set of parts. " - "Exception: ", getCurrentExceptionMessage(true)); + "Exception: {}", getCurrentExceptionMessage(true)); std::terminate(); } @@ -1852,7 +1856,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif size_t cleared_count = 0; - /// Delete temporary directories older than a day. + /// Delete temporary directories older than a the specified age. for (const auto & disk : getDisks()) { if (disk->isBroken()) @@ -2506,7 +2510,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ for (const auto & disk : disks) { if (disk->exists(new_table_path)) - throw Exception{"Target path already exists: " + fullPath(disk, new_table_path), ErrorCodes::DIRECTORY_ALREADY_EXISTS}; + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Target path already exists: {}", fullPath(disk, new_table_path)); } { @@ -2609,6 +2613,14 @@ void MergeTreeData::dropAllData() if (disk->isBroken()) continue; + /// It can naturally happen if we cannot drop table from the first time + /// i.e. get exceptions after remove recursive + if (!disk->exists(relative_data_path)) + { + LOG_INFO(log, "dropAllData: path {} is already removed from disk {}", relative_data_path, disk->getName()); + continue; + } + LOG_INFO(log, "dropAllData: remove format_version.txt, detached, moving and write ahead logs"); disk->removeFileIfExists(fs::path(relative_data_path) / FORMAT_VERSION_FILE_NAME); @@ -2630,8 +2642,9 @@ void MergeTreeData::dropAllData() disk->listFiles(relative_data_path, files_left); throw Exception( - ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Directory {} with table {} not empty (files [{}]) after drop. Will not drop.", - relative_data_path, getStorageID().getNameForLogs(), fmt::join(files_left, ", ")); + ErrorCodes::ZERO_COPY_REPLICATION_ERROR, + "Directory {} with table {} not empty (files [{}]) after drop. Will not drop.", + relative_data_path, getStorageID().getNameForLogs(), fmt::join(files_left, ", ")); } LOG_INFO(log, "dropAllData: removing table directory recursive to cleanup garbage"); @@ -2729,10 +2742,10 @@ void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataTy { /// Check new type can be used as version if (!new_type->canBeUsedAsVersion()) - throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + - " to type " + new_type->getName() + - " because version column must be of an integer type or of type Date or DateTime" - , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "Cannot alter version column {} to type {} because version column must be " + "of an integer type or of type Date or DateTime" , backQuoteIfNeed(column_name), + new_type->getName()); auto which_new_type = WhichDataType(new_type); auto which_old_type = WhichDataType(old_type); @@ -2745,21 +2758,19 @@ void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataTy || (which_old_type.isDateTime() && !which_new_type.isDateTime()) || (which_old_type.isFloat() && !which_new_type.isFloat())) { - throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + - " from type " + old_type->getName() + - " to type " + new_type->getName() + " because new type will change sort order of version column." + - " The only possible conversion is expansion of the number of bytes of the current type." - , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "Cannot alter version column {} from type {} to type {} " + "because new type will change sort order of version column. " + "The only possible conversion is expansion of the number of bytes of the current type.", + backQuoteIfNeed(column_name), old_type->getName(), new_type->getName()); } /// Check alter to smaller size: UInt64 -> UInt32 and so on if (new_type->getSizeOfValueInMemory() < old_type->getSizeOfValueInMemory()) { - throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + - " from type " + old_type->getName() + - " to type " + new_type->getName() + " because new type is smaller than current in the number of bytes." + - " The only possible conversion is expansion of the number of bytes of the current type." - , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "Cannot alter version column {} from type {} to type {} " + "because new type is smaller than current in the number of bytes. " + "The only possible conversion is expansion of the number of bytes of the current type.", + backQuoteIfNeed(column_name), old_type->getName(), new_type->getName()); } } @@ -2780,15 +2791,16 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context auto mutation_commands = commands.getMutationCommands(new_metadata, settings.materialize_ttl_after_modify, getContext()); if (!mutation_commands.empty()) - throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "The following alter commands: '{}' will modify data on disk, but setting `allow_non_metadata_alters` is disabled", queryToString(mutation_commands.ast())); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "The following alter commands: '{}' will modify data on disk, " + "but setting `allow_non_metadata_alters` is disabled", + queryToString(mutation_commands.ast())); } - if (commands.hasInvertedIndex(new_metadata, getContext()) && !settings.allow_experimental_inverted_index) - { - throw Exception( - "Experimental Inverted Index feature is not enabled (the setting 'allow_experimental_inverted_index')", - ErrorCodes::SUPPORT_IS_DISABLED); - } + if (commands.hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); + commands.apply(new_metadata, getContext()); /// Set of columns that shouldn't be altered. @@ -2889,46 +2901,39 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { - throw Exception( - "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax"); } if (command.type == AlterCommand::MODIFY_TTL && !is_custom_partitioned) { - throw Exception( - "ALTER MODIFY TTL is not supported for default-partitioned tables created with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY TTL is not supported for default-partitioned tables created with the old syntax"); } if (command.type == AlterCommand::MODIFY_SAMPLE_BY) { if (!is_custom_partitioned) - throw Exception( - "ALTER MODIFY SAMPLE BY is not supported for default-partitioned tables created with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY SAMPLE BY is not supported for default-partitioned tables created with the old syntax"); checkSampleExpression(new_metadata, getSettings()->compatibility_allow_sampling_expression_not_in_primary_key, getSettings()->check_sample_column_is_correct); } if (command.type == AlterCommand::ADD_INDEX && !is_custom_partitioned) { - throw Exception( - "ALTER ADD INDEX is not supported for tables with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ALTER ADD INDEX is not supported for tables with the old syntax"); } if (command.type == AlterCommand::ADD_PROJECTION) { if (!is_custom_partitioned) - throw Exception( - "ALTER ADD PROJECTION is not supported for tables with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ALTER ADD PROJECTION is not supported for tables with the old syntax"); } if (command.type == AlterCommand::RENAME_COLUMN) { if (columns_in_keys.contains(command.column_name)) { - throw Exception( - "Trying to ALTER RENAME key " + backQuoteIfNeed(command.column_name) + " column which is a part of key expression", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "Trying to ALTER RENAME key {} column which is a part of key expression", + backQuoteIfNeed(command.column_name)); } } else if (command.type == AlterCommand::DROP_COLUMN) @@ -2967,7 +2972,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context for (const auto & reset_setting : command.settings_resets) { if (!settings_from_storage->has(reset_setting)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot reset setting '{}' because it doesn't exist for MergeTree engines family", reset_setting); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot reset setting '{}' because it doesn't exist for MergeTree engines family", + reset_setting); } } else if (command.isRequireMutationStage(getInMemoryMetadata())) @@ -2975,8 +2982,8 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context /// This alter will override data on disk. Let's check that it doesn't /// modify immutable column. if (columns_alter_type_forbidden.contains(command.column_name)) - throw Exception("ALTER of key column " + backQuoteIfNeed(command.column_name) + " is forbidden", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "ALTER of key column {} is forbidden", + backQuoteIfNeed(command.column_name)); if (command.type == AlterCommand::MODIFY_COLUMN) { @@ -2986,10 +2993,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context assert(it != old_types.end()); if (!isSafeForKeyConversion(it->second, command.data_type.get())) - throw Exception("ALTER of partition key column " + backQuoteIfNeed(command.column_name) + " from type " - + it->second->getName() + " to type " + command.data_type->getName() - + " is not safe because it can change the representation of partition key", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "ALTER of partition key column {} from type {} " + "to type {} is not safe because it can change the representation " + "of partition key", backQuoteIfNeed(command.column_name), + it->second->getName(), command.data_type->getName()); } if (columns_alter_type_metadata_only.contains(command.column_name)) @@ -2997,10 +3005,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context auto it = old_types.find(command.column_name); assert(it != old_types.end()); if (!isSafeForKeyConversion(it->second, command.data_type.get())) - throw Exception("ALTER of key column " + backQuoteIfNeed(command.column_name) + " from type " - + it->second->getName() + " to type " + command.data_type->getName() - + " is not safe because it can change the representation of primary key", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "ALTER of key column {} from type {} " + "to type {} is not safe because it can change the representation " + "of primary key", backQuoteIfNeed(command.column_name), + it->second->getName(), command.data_type->getName()); } if (old_metadata.getColumns().has(command.column_name)) @@ -3037,8 +3046,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context if ((!current_value || *current_value != new_value) && MergeTreeSettings::isReadonlySetting(setting_name)) { - throw Exception{"Setting '" + setting_name + "' is readonly for storage '" + getName() + "'", - ErrorCodes::READONLY_SETTING}; + throw Exception(ErrorCodes::READONLY_SETTING, "Setting '{}' is readonly for storage '{}'", setting_name, getName()); } if (!current_value && MergeTreeSettings::isPartFormatSetting(setting_name)) @@ -3047,7 +3055,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context copy.applyChange(changed_setting); String reason; if (!canUsePolymorphicParts(copy, &reason) && !reason.empty()) - throw Exception("Can't change settings. Reason: " + reason, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason); } if (setting_name == "storage_policy") @@ -3062,8 +3070,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context /// Prevent unsetting readonly setting if (MergeTreeSettings::isReadonlySetting(setting_name) && !new_value) { - throw Exception{"Setting '" + setting_name + "' is readonly for storage '" + getName() + "'", - ErrorCodes::READONLY_SETTING}; + throw Exception(ErrorCodes::READONLY_SETTING, "Setting '{}' is readonly for storage '{}'", setting_name, getName()); } if (MergeTreeSettings::isPartFormatSetting(setting_name) && !new_value) @@ -3073,7 +3080,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context copy->applyChanges(new_changes); String reason; if (!canUsePolymorphicParts(*copy, &reason) && !reason.empty()) - throw Exception("Can't change settings. Reason: " + reason, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason); } } @@ -3096,7 +3103,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context if (dropped_columns.size() > 1) postfix = "s"; throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Cannot drop or clear column{} '{}', because all columns in part '{}' will be removed from disk. Empty parts are not allowed", postfix, boost::algorithm::join(dropped_columns, ", "), part->name); + "Cannot drop or clear column{} '{}', because all columns " + "in part '{}' will be removed from disk. Empty parts are not allowed", + postfix, boost::algorithm::join(dropped_columns, ", "), part->name); } } } @@ -3107,72 +3116,39 @@ void MergeTreeData::checkMutationIsPossible(const MutationCommands & /*commands* /// Some validation will be added } -MergeTreeDataPartType MergeTreeData::choosePartType(size_t bytes_uncompressed, size_t rows_count) const +MergeTreeDataPartFormat MergeTreeData::choosePartFormat(size_t bytes_uncompressed, size_t rows_count, bool only_on_disk) const { - const auto settings = getSettings(); - if (!canUsePolymorphicParts(*settings)) - return MergeTreeDataPartType::Wide; + using PartType = MergeTreeDataPartType; + using PartStorageType = MergeTreeDataPartStorageType; - if (bytes_uncompressed < settings->min_bytes_for_compact_part || rows_count < settings->min_rows_for_compact_part) - return MergeTreeDataPartType::InMemory; + const auto settings = getSettings(); + if (!canUsePolymorphicParts(*settings)) + return {PartType::Wide, PartStorageType::Full}; - if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part) - return MergeTreeDataPartType::Compact; - - return MergeTreeDataPartType::Wide; -} - -MergeTreeDataPartType MergeTreeData::choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const -{ - const auto settings = getSettings(); - if (!canUsePolymorphicParts(*settings)) - return MergeTreeDataPartType::Wide; - - if (bytes_uncompressed < settings->min_bytes_for_wide_part || rows_count < settings->min_rows_for_wide_part) - return MergeTreeDataPartType::Compact; - - return MergeTreeDataPartType::Wide; -} - - -MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name, - MergeTreeDataPartType type, const MergeTreePartInfo & part_info, - const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const -{ - if (type == MergeTreeDataPartType::Compact) - return std::make_shared(*this, name, part_info, data_part_storage, parent_part); - else if (type == MergeTreeDataPartType::Wide) - return std::make_shared(*this, name, part_info, data_part_storage, parent_part); - else if (type == MergeTreeDataPartType::InMemory) - return std::make_shared(*this, name, part_info, data_part_storage, parent_part); - else - throw Exception("Unknown type of part " + data_part_storage->getRelativePath(), ErrorCodes::UNKNOWN_PART_TYPE); -} - -MergeTreeData::MutableDataPartPtr MergeTreeData::createPart( - const String & name, const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const -{ - return createPart(name, MergeTreePartInfo::fromPartName(name, format_version), data_part_storage, parent_part); -} - -MergeTreeData::MutableDataPartPtr MergeTreeData::createPart( - const String & name, const MergeTreePartInfo & part_info, - const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part) const -{ - MergeTreeDataPartType type; - auto mrk_ext = MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(*data_part_storage); - - if (mrk_ext) + auto satisfies = [&](const auto & min_bytes_for, const auto & min_rows_for) { - type = MarkType(*mrk_ext).part_type; - } - else - { - /// Didn't find any mark file, suppose that part is empty. - type = choosePartTypeOnDisk(0, 0); - } + return bytes_uncompressed < min_bytes_for || rows_count < min_rows_for; + }; - return createPart(name, type, part_info, data_part_storage, parent_part); + if (!only_on_disk && satisfies(settings->min_bytes_for_compact_part, settings->min_rows_for_compact_part)) + return {PartType::InMemory, PartStorageType::Full}; + + auto part_type = PartType::Wide; + if (satisfies(settings->min_bytes_for_wide_part, settings->min_rows_for_wide_part)) + part_type = PartType::Compact; + + return {part_type, PartStorageType::Full}; +} + +MergeTreeDataPartFormat MergeTreeData::choosePartFormatOnDisk(size_t bytes_uncompressed, size_t rows_count) const +{ + return choosePartFormat(bytes_uncompressed, rows_count, true); +} + +MergeTreeDataPartBuilder MergeTreeData::getDataPartBuilder( + const String & name, const VolumePtr & volume, const String & part_dir) const +{ + return MergeTreeDataPartBuilder(*this, name, volume, relative_data_path, part_dir); } void MergeTreeData::changeSettings( @@ -3207,7 +3183,7 @@ void MergeTreeData::changeSettings( { auto disk = new_storage_policy->getDiskByName(disk_name); if (disk->exists(relative_data_path)) - throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "New storage policy contain disks which already contain data of a table with the same name"); } for (const String & disk_name : all_diff_disk_names) @@ -3252,7 +3228,7 @@ void MergeTreeData::PartsTemporaryRename::tryRenameAll() { const auto & [old_name, new_name, disk] = old_and_new_names[i]; if (old_name.empty() || new_name.empty()) - throw DB::Exception("Empty part name. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Empty part name. Most likely it's a bug."); const auto full_path = fs::path(storage.relative_data_path) / source_dir; disk->moveFile(fs::path(full_path) / old_name, fs::path(full_path) / new_name); } @@ -3395,10 +3371,8 @@ void MergeTreeData::checkPartPartition(MutableDataPartPtr & part, DataPartsLock if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock)) { if (part->partition.value != existing_part_in_partition->partition.value) - throw Exception( - "Partition value mismatch between two parts with the same partition ID. Existing part: " - + existing_part_in_partition->name + ", newly added part: " + part->name, - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Partition value mismatch between two parts with the same partition ID. " + "Existing part: {}, newly added part: {}", existing_part_in_partition->name, part->name); } } @@ -3408,15 +3382,14 @@ void MergeTreeData::checkPartDuplicate(MutableDataPartPtr & part, Transaction & if (it_duplicate != data_parts_by_info.end()) { - String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists"; - if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting})) - throw Exception(message + ", but it will be deleted soon", ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part {} already exists, but it will be deleted soon", + (*it_duplicate)->getNameWithState()); if (transaction.txn) - throw Exception(message, ErrorCodes::SERIALIZATION_ERROR); + throw Exception(ErrorCodes::SERIALIZATION_ERROR, "Part {} already exists", (*it_duplicate)->getNameWithState()); - throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists", (*it_duplicate)->getNameWithState()); } } @@ -3447,8 +3420,7 @@ bool MergeTreeData::addTempPart( { LOG_TRACE(log, "Adding temporary part from directory {} with name {}.", part->getDataPartStorage().getPartDirectory(), part->name); if (&out_transaction.data != this) - throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeData::Transaction for one table cannot be used with another. It is a bug."); if (part->hasLightweightDelete()) has_lightweight_delete_parts.store(true); @@ -3490,7 +3462,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( LOG_TRACE(log, "Renaming temporary part {} to {} with tid {}.", part->getDataPartStorage().getPartDirectory(), part->name, out_transaction.getTID()); if (&out_transaction.data != this) - throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeData::Transaction for one table cannot be used with another. It is a bug."); part->assertState({DataPartState::Temporary}); checkPartPartition(part, lock); @@ -3500,21 +3472,18 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( if (!hierarchy.intersected_parts.empty()) { - String message = fmt::format("Part {} intersects part {}", part->name, hierarchy.intersected_parts.back()->getNameWithState()); - // Drop part|partition operation inside some transactions sees some stale snapshot from the time when transactions has been started. // So such operation may attempt to delete already outdated part. In this case, this outdated part is most likely covered by the other part and intersection may occur. // Part mayght be outdated due to merge|mutation|update|optimization operations. if (part->isEmpty() || (hierarchy.intersected_parts.size() == 1 && hierarchy.intersected_parts.back()->isEmpty())) { - message += fmt::format(" One of them is empty part. That is a race between drop operation under transaction and a merge/mutation."); - throw Exception(message, ErrorCodes::SERIALIZATION_ERROR); + throw Exception(ErrorCodes::SERIALIZATION_ERROR, "Part {} intersects part {}. One of them is empty part. " + "That is a race between drop operation under transaction and a merge/mutation.", + part->name, hierarchy.intersected_parts.back()->getNameWithState()); } - if (hierarchy.intersected_parts.size() > 1) - message += fmt::format(" There are {} intersected parts.", hierarchy.intersected_parts.size()); - - throw Exception(ErrorCodes::LOGICAL_ERROR, message + " It is a bug."); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. There are {} intersected parts. It is a bug.", + part->name, hierarchy.intersected_parts.back()->getNameWithState(), hierarchy.intersected_parts.size()); } if (part->hasLightweightDelete()) @@ -3564,7 +3533,7 @@ bool MergeTreeData::renameTempPartAndAdd( if (!covered_parts.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Added part {} covers {} existing part(s) (including {})", - part->name, toString(covered_parts.size()), covered_parts[0]->name); + part->name, covered_parts.size(), covered_parts[0]->name); return true; } @@ -3611,7 +3580,7 @@ void MergeTreeData::removePartsFromWorkingSetImmediatelyAndSetTemporaryState(con { auto it_part = data_parts_by_info.find(part->info); if (it_part == data_parts_by_info.end()) - throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} not found in data_parts", part->getNameWithState()); assert(part->getState() == MergeTreeDataPartState::PreActive); @@ -3629,7 +3598,7 @@ void MergeTreeData::removePartsFromWorkingSet( for (const auto & part : remove) { if (!data_parts_by_info.count(part->info)) - throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} not found in data_parts", part->getNameWithState()); part->assertState({DataPartState::PreActive, DataPartState::Active, DataPartState::Outdated}); } @@ -3656,7 +3625,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( for (const DataPartPtr & part : partition_range) { if (part->info.partition_id != drop_range.partition_id) - throw Exception("Unexpected partition_id of part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected partition_id of part {}. This is a bug.", part->name); /// It's a DROP PART and it's already executed by fetching some covering part bool is_drop_part = !drop_range.isFakeDropRangePart() && drop_range.min_block; @@ -3791,7 +3760,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT auto it_part = data_parts_by_info.find(part_to_detach->info); if (it_part == data_parts_by_info.end()) - throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No such data part {}", part_to_detach->getNameWithState()); /// What if part_to_detach is a reference to *it_part? Make a new owner just in case. /// Important to own part pointer here (not const reference), because it will be removed from data_parts_indexes @@ -3934,7 +3903,7 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) { auto it = data_parts_by_info.find(part->info); if (it == data_parts_by_info.end() || (*it).get() != part.get()) - throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} doesn't exist", part->name); part.reset(); @@ -4068,7 +4037,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex ErrorCodes::TOO_MANY_PARTS, "Too many parts ({}) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified " "with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", - toString(parts_count_in_total)); + parts_count_in_total); } size_t outdated_parts_over_threshold = 0; @@ -4202,7 +4171,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) { auto active_part_it = data_parts_by_info.find(original_active_part->info); if (active_part_it == data_parts_by_info.end()) - throw Exception("Cannot swap part '" + part_copy->name + "', no such active part.", ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Cannot swap part '{}', no such active part.", part_copy->name); /// We do not check allow_remote_fs_zero_copy_replication here because data may be shared /// when allow_remote_fs_zero_copy_replication turned on and off again @@ -4234,7 +4203,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) return; } } - throw Exception("Cannot swap part '" + part_copy->name + "', no such active part.", ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Cannot swap part '{}', no such active part.", part_copy->name); } @@ -4455,8 +4424,9 @@ void MergeTreeData::checkAlterPartitionIsPossible( { if (command.type == PartitionCommand::DROP_DETACHED_PARTITION && !settings.allow_drop_detached) - throw DB::Exception("Cannot execute query: DROP DETACHED PART is disabled " - "(see allow_drop_detached setting)", ErrorCodes::SUPPORT_IS_DISABLED); + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Cannot execute query: DROP DETACHED PART " + "is disabled (see allow_drop_detached setting)"); if (command.partition && command.type != PartitionCommand::DROP_DETACHED_PARTITION) { @@ -4473,7 +4443,7 @@ void MergeTreeData::checkAlterPartitionIsPossible( if (partition_ast && partition_ast->all) { if (command.type != PartitionCommand::DROP_PARTITION) - throw DB::Exception("Only support DETACH PARTITION ALL currently", ErrorCodes::SUPPORT_IS_DISABLED); + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DETACH PARTITION ALL currently"); } else getPartitionIDFromQuery(command.partition, getContext()); @@ -4542,15 +4512,13 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & { String no_parts_to_move_message; if (moving_part) - no_parts_to_move_message = "Part '" + partition_id + "' is already on disk '" + disk->getName() + "'"; + throw Exception(ErrorCodes::UNKNOWN_DISK, "Part '{}' is already on disk '{}'", partition_id, disk->getName()); else - no_parts_to_move_message = "All parts of partition '" + partition_id + "' are already on disk '" + disk->getName() + "'"; - - throw Exception(no_parts_to_move_message, ErrorCodes::UNKNOWN_DISK); + throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on disk '{}'", partition_id, disk->getName()); } if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) - throw Exception("Cannot move parts because moves are manually disabled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); } @@ -4597,15 +4565,13 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String { String no_parts_to_move_message; if (moving_part) - no_parts_to_move_message = "Part '" + partition_id + "' is already on volume '" + volume->getName() + "'"; + throw Exception(ErrorCodes::UNKNOWN_DISK, "Part '{}' is already on volume '{}'", partition_id, volume->getName()); else - no_parts_to_move_message = "All parts of partition '" + partition_id + "' are already on volume '" + volume->getName() + "'"; - - throw Exception(no_parts_to_move_message, ErrorCodes::UNKNOWN_DISK); + throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on volume '{}'", partition_id, volume->getName()); } if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) - throw Exception("Cannot move parts because moves are manually disabled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); } void MergeTreeData::movePartitionToShard(const ASTPtr & /*partition*/, bool /*move_part*/, const String & /*to*/, ContextPtr /*query_context*/) @@ -4756,7 +4722,7 @@ Pipe MergeTreeData::alterPartition( break; default: - throw Exception("Uninitialized partition command", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uninitialized partition command"); } for (auto & command_result : current_command_results) command_result.command_type = command.typeToString(); @@ -5017,8 +4983,9 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r } auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); - auto data_part_storage = std::make_shared(single_disk_volume, temp_part_dir.parent_path(), part_name); - auto part = createPart(part_name, part_info, data_part_storage); + MergeTreeDataPartBuilder builder(*this, part_name, single_disk_volume, temp_part_dir.parent_path(), part_name); + builder.withPartFormatFromDisk(); + auto part = std::move(builder).build(); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); part->loadColumnsChecksumsIndexes(false, true); @@ -5031,7 +4998,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc const auto & partition_ast = ast->as(); if (partition_ast.all) - throw Exception("Only Support DETACH PARTITION ALL currently", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only Support DETACH PARTITION ALL currently"); if (!partition_ast.value) { @@ -5357,12 +5324,11 @@ DetachedPartsInfo MergeTreeData::getDetachedParts() const void MergeTreeData::validateDetachedPartName(const String & name) { if (name.find('/') != std::string::npos || name == "." || name == "..") - throw DB::Exception("Invalid part name '" + name + "'", ErrorCodes::INCORRECT_FILE_NAME); + throw DB::Exception(ErrorCodes::INCORRECT_FILE_NAME, "Invalid part name '{}'", name); if (startsWith(name, "attaching_") || startsWith(name, "deleting_")) - throw DB::Exception("Cannot drop part " + name + ": " - "most likely it is used by another DROP or ATTACH query.", - ErrorCodes::BAD_DATA_PART_NAME); + throw DB::Exception(ErrorCodes::BAD_DATA_PART_NAME, "Cannot drop part {}: " + "most likely it is used by another DROP or ATTACH query.", name); } void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr local_context) @@ -5467,8 +5433,9 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const LOG_DEBUG(log, "Checking part {}", new_name); auto single_disk_volume = std::make_shared("volume_" + old_name, disk); - auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, source_dir + new_name); - MutableDataPartPtr part = createPart(old_name, data_part_storage); + auto part = getDataPartBuilder(old_name, single_disk_volume, source_dir + new_name) + .withPartFormatFromDisk() + .build(); loadPartAndFixMetadataImpl(part); loaded_parts.push_back(part); @@ -5485,7 +5452,7 @@ inline ReservationPtr checkAndReturnReservation(UInt64 expected_size, Reservatio if (reservation) return reservation; - throw Exception(fmt::format("Cannot reserve {}, not enough space", ReadableSize(expected_size)), ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Cannot reserve {}, not enough space", ReadableSize(expected_size)); } } @@ -5671,13 +5638,11 @@ bool MergeTreeData::isPartInTTLDestination(const TTLDescription & ttl, const IMe CompressionCodecPtr MergeTreeData::getCompressionCodecForPart(size_t part_size_compressed, const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t current_time) const { - auto metadata_snapshot = getInMemoryMetadataPtr(); const auto & recompression_ttl_entries = metadata_snapshot->getRecompressionTTLs(); auto best_ttl_entry = selectTTLDescriptionForTTLInfos(recompression_ttl_entries, ttl_infos.recompression_ttl, current_time, true); - if (best_ttl_entry) return CompressionCodecFactory::instance().get(best_ttl_entry->recompression_codec, {}); @@ -6106,9 +6071,9 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( ContextPtr query_context) const { if (!metadata_snapshot->minmax_count_projection) - throw Exception( - "Cannot find the definition of minmax_count projection but it's used in current query. It's a bug", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot find the definition of minmax_count projection but it's used in current query. " + "It's a bug"); auto block = metadata_snapshot->minmax_count_projection->sample_block.cloneEmpty(); bool need_primary_key_max_column = false; @@ -6205,7 +6170,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( continue; if (!part->minmax_idx->initialized) - throw Exception("Found a non-empty part with uninitialized minmax_idx. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Found a non-empty part with uninitialized minmax_idx. It's a bug"); filter_column_data.emplace_back(); @@ -6838,12 +6803,12 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour { MergeTreeData * src_data = dynamic_cast(&source_table); if (!src_data) - throw Exception("Table " + source_table.getStorageID().getNameForLogs() + - " supports attachPartitionFrom only for MergeTree family of table engines." - " Got " + source_table.getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Table {} supports attachPartitionFrom only for MergeTree family of table engines. Got {}", + source_table.getStorageID().getNameForLogs(), source_table.getName()); if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) - throw Exception("Tables have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS); + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); auto query_to_string = [] (const ASTPtr & ast) { @@ -6851,16 +6816,16 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour }; if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST())) - throw Exception("Tables have different ordering", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering"); if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST())) - throw Exception("Tables have different partition key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key"); if (format_version != src_data->format_version) - throw Exception("Tables have different format_version", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version"); if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) - throw Exception("Tables have different primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key"); return *src_data; } @@ -6919,9 +6884,13 @@ std::pair MergeTreeData::cloneAn src_flushed_tmp_dir_lock = src_part->storage.getTemporaryPartDirectoryHolder(tmp_src_part_file_name); auto flushed_part_storage = src_part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); - src_flushed_tmp_part = createPart(src_part->name, src_part->info, flushed_part_storage); - src_flushed_tmp_part->is_temp = true; + src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, src_part->name, flushed_part_storage) + .withPartInfo(src_part->info) + .withPartFormatFromDisk() + .build(); + + src_flushed_tmp_part->is_temp = true; src_part_storage = flushed_part_storage; } @@ -6929,7 +6898,13 @@ std::pair MergeTreeData::cloneAn if (copy_instead_of_hardlink) with_copy = " (copying data)"; - auto dst_part_storage = src_part_storage->freeze(relative_data_path, tmp_dst_part_name, /* make_source_readonly */ false, {}, copy_instead_of_hardlink, files_to_copy_instead_of_hardlinks); + auto dst_part_storage = src_part_storage->freeze( + relative_data_path, + tmp_dst_part_name, + /*make_source_readonly=*/ false, + /*save_metadata_callback=*/ {}, + copy_instead_of_hardlink, + files_to_copy_instead_of_hardlinks); LOG_DEBUG(log, "Clone {} part {} to {}{}", src_flushed_tmp_part ? "flushed" : "", @@ -6937,7 +6912,9 @@ std::pair MergeTreeData::cloneAn std::string(fs::path(dst_part_storage->getFullRootPath()) / tmp_dst_part_name), with_copy); - auto dst_data_part = createPart(dst_part_name, dst_part_info, dst_part_storage); + auto dst_data_part = MergeTreeDataPartBuilder(*this, dst_part_name, dst_part_storage) + .withPartFormatFromDisk() + .build(); if (!copy_instead_of_hardlink && hardlinked_files) { @@ -7131,15 +7108,18 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( src_flushed_tmp_dir_lock = part->storage.getTemporaryPartDirectoryHolder("tmp_freeze" + part->name); auto flushed_part_storage = part_in_memory->flushToDisk(flushed_part_path, metadata_snapshot); - src_flushed_tmp_part = createPart(part->name, part->info, flushed_part_storage); - src_flushed_tmp_part->is_temp = true; + src_flushed_tmp_part = MergeTreeDataPartBuilder(*this, part->name, flushed_part_storage) + .withPartInfo(part->info) + .withPartFormatFromDisk() + .build(); + + src_flushed_tmp_part->is_temp = true; data_part_storage = flushed_part_storage; } auto callback = [this, &part, &backup_part_path](const DiskPtr & disk) { - // Store metadata for replicated table. // Do nothing for non-replicated. createAndStoreFreezeMetadata(disk, part, fs::path(backup_part_path) / part->getDataPartStorage().getPartDirectory()); @@ -7148,10 +7128,10 @@ PartitionCommandsResultInfo MergeTreeData::freezePartitionsByMatcher( auto new_storage = data_part_storage->freeze( backup_part_path, part->getDataPartStorage().getPartDirectory(), - /*make_source_readonly*/ true, + /*make_source_readonly=*/ true, callback, - /*copy_instead_of_hardlink*/ false, - {}); + /*copy_instead_of_hardlink=*/ false, + /*files_to_copy_instead_of_hardlinks=*/ {}); part->is_frozen.store(true, std::memory_order_relaxed); result.push_back(PartitionCommandResultInfo{ @@ -7310,7 +7290,7 @@ MergeTreeData::CurrentlyMovingPartsTagger::CurrentlyMovingPartsTagger(MergeTreeM { for (const auto & moving_part : parts_to_move) if (!data.currently_moving_parts.emplace(moving_part.part).second) - throw Exception("Cannot move part '" + moving_part.part->name + "'. It's already moving.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot move part '{}'. It's already moving.", moving_part.part->name); } MergeTreeData::CurrentlyMovingPartsTagger::~CurrentlyMovingPartsTagger() @@ -7399,19 +7379,17 @@ MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::checkPartsForMove(co { auto reservation = space->reserve(part->getBytesOnDisk()); if (!reservation) - throw Exception("Move is not possible. Not enough space on '" + space->getName() + "'", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Move is not possible. Not enough space on '{}'", space->getName()); auto reserved_disk = reservation->getDisk(); if (reserved_disk->exists(relative_data_path + part->name)) - throw Exception( - "Move is not possible: " + fullPath(reserved_disk, relative_data_path + part->name) + " already exists", - ErrorCodes::DIRECTORY_ALREADY_EXISTS); + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Move is not possible: {} already exists", + fullPath(reserved_disk, relative_data_path + part->name)); if (currently_moving_parts.contains(part) || partIsAssignedToBackgroundOperation(part)) - throw Exception( - "Cannot move part '" + part->name + "' because it's participating in background process", - ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, + "Cannot move part '{}' because it's participating in background process", part->name); parts_to_move.emplace_back(part, std::move(reservation)); } @@ -7513,6 +7491,11 @@ bool MergeTreeData::partsContainSameProjections(const DataPartPtr & left, const return true; } +bool MergeTreeData::canUsePolymorphicParts() const +{ + return canUsePolymorphicParts(*getSettings(), nullptr); +} + bool MergeTreeData::canUsePolymorphicParts(const MergeTreeSettings & settings, String * out_reason) const { if (!canUseAdaptiveGranularity()) @@ -7914,19 +7897,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( ReservationPtr reservation = reserveSpacePreferringTTLRules(metadata_snapshot, 0, move_ttl_infos, time(nullptr), 0, true); VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume); - auto new_data_part_storage = std::make_shared( - data_part_volume, - getRelativeDataPath(), - EMPTY_PART_TMP_PREFIX + new_part_name); - - auto new_data_part = createPart( - new_part_name, - choosePartTypeOnDisk(0, block.rows()), - new_part_info, - new_data_part_storage - ); - - new_data_part->name = new_part_name; + auto new_data_part = getDataPartBuilder(new_part_name, data_part_volume, EMPTY_PART_TMP_PREFIX + new_part_name) + .withBytesAndRowsOnDisk(0, 0) + .withPartInfo(new_part_info) + .build(); if (settings->assign_part_uuids) new_data_part->uuid = UUIDHelpers::generateV4(); @@ -7939,6 +7913,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( new_data_part->minmax_idx = std::move(minmax_idx); new_data_part->is_temp = true; + auto new_data_part_storage = new_data_part->getDataPartStoragePtr(); + new_data_part_storage->beginTransaction(); + SyncGuardPtr sync_guard; if (new_data_part->isStoredOnDisk()) { @@ -7972,9 +7949,9 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createEmptyPart( out.write(block); /// Here is no projections as no data inside - out.finalizePart(new_data_part, sync_on_insert); + new_data_part_storage->precommitTransaction(); return new_data_part; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8ced76b0bc5..7dcd0c40553 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -223,21 +225,9 @@ public: using OperationDataPartsLock = std::unique_lock; OperationDataPartsLock lockOperationsWithParts() const { return OperationDataPartsLock(operation_with_data_parts_mutex); } - MergeTreeDataPartType choosePartType(size_t bytes_uncompressed, size_t rows_count) const; - MergeTreeDataPartType choosePartTypeOnDisk(size_t bytes_uncompressed, size_t rows_count) const; - - /// After this method setColumns must be called - MutableDataPartPtr createPart(const String & name, - MergeTreeDataPartType type, const MergeTreePartInfo & part_info, - const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; - - /// Create part, that already exists on filesystem. - /// After this methods 'loadColumnsChecksumsIndexes' must be called. - MutableDataPartPtr createPart(const String & name, - const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; - - MutableDataPartPtr createPart(const String & name, const MergeTreePartInfo & part_info, - const MutableDataPartStoragePtr & data_part_storage, const IMergeTreeDataPart * parent_part = nullptr) const; + MergeTreeDataPartFormat choosePartFormat(size_t bytes_uncompressed, size_t rows_count, bool only_on_disk = false) const; + MergeTreeDataPartFormat choosePartFormatOnDisk(size_t bytes_uncompressed, size_t rows_count) const; + MergeTreeDataPartBuilder getDataPartBuilder(const String & name, const VolumePtr & volume, const String & part_dir) const; /// Auxiliary object to add a set of parts into the working set in two steps: /// * First, as PreActive parts (the parts are ready, but not yet in the active set). @@ -1053,6 +1043,7 @@ public: scope_guard getTemporaryPartDirectoryHolder(const String & part_dir_name) const; void waitForOutdatedPartsToBeLoaded() const; + bool canUsePolymorphicParts() const; protected: friend class IMergeTreeDataPart; @@ -1089,7 +1080,7 @@ protected: MultiVersion storage_settings; /// Used to determine which UUIDs to send to root query executor for deduplication. - mutable std::shared_mutex pinned_part_uuids_mutex; + mutable SharedMutex pinned_part_uuids_mutex; PinnedPartUUIDsPtr pinned_part_uuids; /// True if at least one part was created/removed with transaction. @@ -1197,23 +1188,23 @@ protected: void modifyPartState(DataPartIteratorByStateAndInfo it, DataPartState state) { if (!data_parts_by_state_and_info.modify(it, getStateModifier(state))) - throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't modify {}", (*it)->getNameWithState()); } void modifyPartState(DataPartIteratorByInfo it, DataPartState state) { if (!data_parts_by_state_and_info.modify(data_parts_indexes.project(it), getStateModifier(state))) - throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't modify {}", (*it)->getNameWithState()); } void modifyPartState(const DataPartPtr & part, DataPartState state) { auto it = data_parts_by_info.find(part->info); if (it == data_parts_by_info.end() || (*it).get() != part.get()) - throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} doesn't exist", part->name); if (!data_parts_by_state_and_info.modify(data_parts_indexes.project(it), getStateModifier(state))) - throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't modify {}", (*it)->getNameWithState()); } /// Used to serialize calls to grabOldParts. diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index dee3b1d9967..2ffc6dc818e 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -373,7 +373,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl if (parts_to_merge.size() == 1) - throw Exception("Logical error: merge selector returned only one part to merge", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge selector returned only one part to merge"); if (parts_to_merge.empty()) { @@ -631,8 +631,8 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart { for (size_t i = 0; i < parts.size(); ++i) if (parts[i]->name != replaced_parts[i]->name) - throw Exception("Unexpected part removed when adding " + new_data_part->name + ": " + replaced_parts[i]->name - + " instead of " + parts[i]->name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected part removed when adding {}: {} instead of {}", + new_data_part->name, replaced_parts[i]->name, parts[i]->name); } LOG_TRACE(log, "Merged {} parts: [{}, {}] -> {}", parts.size(), parts.front()->name, parts.back()->name, new_data_part->name); diff --git a/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp b/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp new file mode 100644 index 00000000000..d55248df0af --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartBuilder.cpp @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int UNKNOWN_PART_TYPE; +} + +MergeTreeDataPartBuilder::MergeTreeDataPartBuilder( + const MergeTreeData & data_, String name_, VolumePtr volume_, String root_path_, String part_dir_) + : data(data_) + , name(std::move(name_)) + , volume(std::move(volume_)) + , root_path(std::move(root_path_)) + , part_dir(std::move(part_dir_)) +{ +} + +MergeTreeDataPartBuilder::MergeTreeDataPartBuilder( + const MergeTreeData & data_, String name_, MutableDataPartStoragePtr part_storage_) + : data(data_) + , name(std::move(name_)) + , part_storage(std::move(part_storage_)) +{ +} + +std::shared_ptr MergeTreeDataPartBuilder::build() +{ + using PartType = MergeTreeDataPartType; + using PartStorageType = MergeTreeDataPartStorageType; + + if (!part_type) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create part {}, because part type is not set", name); + + if (!part_storage) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create part {}, because part storage is not set", name); + + if (parent_part && data.format_version == MERGE_TREE_DATA_OLD_FORMAT_VERSION) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create projection part in MergeTree table created in old syntax"); + + auto part_storage_type = part_storage->getType(); + if (!data.canUsePolymorphicParts() && + (part_type != PartType::Wide || part_storage_type != PartStorageType::Full)) + { + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot create part with type {} and storage type {} because table does not support polymorphic parts", + part_type->toString(), part_storage_type.toString()); + } + + if (!part_info) + part_info = MergeTreePartInfo::fromPartName(name, data.format_version); + + switch (part_type->getValue()) + { + case PartType::Wide: + return std::make_shared(data, name, *part_info, part_storage, parent_part); + case PartType::Compact: + return std::make_shared(data, name, *part_info, part_storage, parent_part); + case PartType::InMemory: + return std::make_shared(data, name, *part_info, part_storage, parent_part); + default: + throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, + "Unknown type of part {}", part_storage->getRelativePath()); + } +} + +MutableDataPartStoragePtr MergeTreeDataPartBuilder::getPartStorageByType( + MergeTreeDataPartStorageType storage_type_, + const VolumePtr & volume_, + const String & root_path_, + const String & part_dir_) +{ + if (!volume_) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create part storage, because volume is not specified"); + + using Type = MergeTreeDataPartStorageType; + switch (storage_type_.getValue()) + { + case Type::Full: + return std::make_shared(volume_, root_path_, part_dir_); + default: + throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, + "Unknown type of storage for part {}", fs::path(root_path_) / part_dir_); + } +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartInfo(MergeTreePartInfo part_info_) +{ + part_info = std::move(part_info_); + return *this; +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withParentPart(const IMergeTreeDataPart * parent_part_) +{ + if (parent_part_ && parent_part_->isProjectionPart()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Parent part cannot be projection"); + + parent_part = parent_part_; + return *this; +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartType(MergeTreeDataPartType part_type_) +{ + part_type = part_type_; + return *this; +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartStorageType(MergeTreeDataPartStorageType storage_type_) +{ + part_storage = getPartStorageByType(storage_type_, volume, root_path, part_dir); + return *this; +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartFormat(MergeTreeDataPartFormat format_) +{ + withPartType(format_.part_type); + return part_storage ? *this : withPartStorageType(format_.storage_type); +} + +MergeTreeDataPartBuilder::PartStorageAndMarkType +MergeTreeDataPartBuilder::getPartStorageAndMarkType( + const VolumePtr & volume_, + const String & root_path_, + const String & part_dir_) +{ + auto disk = volume_->getDisk(); + auto part_relative_path = fs::path(root_path_) / part_dir_; + + for (auto it = disk->iterateDirectory(part_relative_path); it->isValid(); it->next()) + { + auto it_path = fs::path(it->name()); + auto ext = it_path.extension().string(); + + if (MarkType::isMarkFileExtension(ext)) + { + auto storage = getPartStorageByType(MergeTreeDataPartStorageType::Full, volume_, root_path_, part_dir_); + return {std::move(storage), MarkType(ext)}; + } + } + + return {}; +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartFormatFromDisk() +{ + if (part_storage) + return withPartFormatFromStorage(); + else + return withPartFormatFromVolume(); +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartFormatFromVolume() +{ + assert(volume); + auto [storage, mark_type] = getPartStorageAndMarkType(volume, root_path, part_dir); + + if (!storage || !mark_type) + { + /// Didn't find any data or mark file, suppose that part is empty. + return withBytesAndRowsOnDisk(0, 0); + } + + part_storage = std::move(storage); + part_type = mark_type->part_type; + return *this; +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withPartFormatFromStorage() +{ + assert(part_storage); + auto mark_type = MergeTreeIndexGranularityInfo::getMarksTypeFromFilesystem(*part_storage); + + if (!mark_type) + { + /// Didn't find any mark file, suppose that part is empty. + return withBytesAndRowsOnDisk(0, 0); + } + + part_type = mark_type->part_type; + return *this; +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withBytesAndRows(size_t bytes_uncompressed, size_t rows_count) +{ + return withPartFormat(data.choosePartFormat(bytes_uncompressed, rows_count)); +} + +MergeTreeDataPartBuilder & MergeTreeDataPartBuilder::withBytesAndRowsOnDisk(size_t bytes_uncompressed, size_t rows_count) +{ + return withPartFormat(data.choosePartFormatOnDisk(bytes_uncompressed, rows_count)); +} + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartBuilder.h b/src/Storages/MergeTree/MergeTreeDataPartBuilder.h new file mode 100644 index 00000000000..0f54ff0a631 --- /dev/null +++ b/src/Storages/MergeTree/MergeTreeDataPartBuilder.h @@ -0,0 +1,69 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class IDataPartStorage; +class IMergeTreeDataPart; +class IVolume; +class IDisk; +class MergeTreeData; + +using MutableDataPartStoragePtr = std::shared_ptr; +using VolumePtr = std::shared_ptr; + +/// Class that helps to create a data part with different variations of arguments. +class MergeTreeDataPartBuilder +{ +public: + MergeTreeDataPartBuilder(const MergeTreeData & data_, String name_, VolumePtr volume_, String root_path_, String part_dir_); + MergeTreeDataPartBuilder(const MergeTreeData & data_, String name_, MutableDataPartStoragePtr part_storage_); + + std::shared_ptr build(); + + using Self = MergeTreeDataPartBuilder; + + Self & withPartInfo(MergeTreePartInfo part_info_); + Self & withParentPart(const IMergeTreeDataPart * parent_part_); + Self & withPartType(MergeTreeDataPartType part_type_); + Self & withPartStorageType(MergeTreeDataPartStorageType storage_type_); + Self & withPartFormat(MergeTreeDataPartFormat format_); + Self & withPartFormatFromDisk(); + Self & withBytesAndRows(size_t bytes_uncompressed, size_t rows_count); + Self & withBytesAndRowsOnDisk(size_t bytes_uncompressed, size_t rows_count); + + using PartStorageAndMarkType = std::pair>; + + static PartStorageAndMarkType getPartStorageAndMarkType( + const VolumePtr & volume_, + const String & root_path_, + const String & part_dir_); + +private: + Self & withPartFormatFromVolume(); + Self & withPartFormatFromStorage(); + + static MutableDataPartStoragePtr getPartStorageByType( + MergeTreeDataPartStorageType storage_type_, + const VolumePtr & volume_, + const String & root_path_, + const String & part_dir_); + + const MergeTreeData & data; + const String name; + const VolumePtr volume; + const String root_path; + const String part_dir; + + std::optional part_info; + std::optional part_type; + MutableDataPartStoragePtr part_storage; + const IMergeTreeDataPart * parent_part = nullptr; +}; + +} diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 81f884ef45a..719a60b2f31 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -30,30 +31,33 @@ void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs if (is_compressed && have_uncompressed) { if (!rhs.is_compressed) - throw Exception("No uncompressed checksum for file " + name, ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}", name); if (rhs.uncompressed_size != uncompressed_size) - throw Exception("Unexpected uncompressed size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part", name); if (rhs.uncompressed_hash != uncompressed_hash) - throw Exception("Checksum mismatch for uncompressed file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part", name); return; } if (rhs.file_size != file_size) - throw Exception("Unexpected size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part", name); if (rhs.file_hash != file_hash) - throw Exception("Checksum mismatch for file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part", name); } -void MergeTreeDataPartChecksum::checkSize(const DiskPtr & disk, const String & path) const +void MergeTreeDataPartChecksum::checkSize(const IDataPartStorage & storage, const String & name) const { - if (!disk->exists(path)) - throw Exception(fullPath(disk, path) + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); - if (disk->isDirectory(path)) - // This is a projection, no need to check its size. + if (!storage.exists(name)) + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "{} doesn't exist", fs::path(storage.getRelativePath()) / name); + + // This is a projection, no need to check its size. + if (storage.isDirectory(name)) return; - UInt64 size = disk->getFileSize(path); + + UInt64 size = storage.getFileSize(name); if (size != file_size) - throw Exception(fullPath(disk, path) + " has unexpected size: " + toString(size) + " instead of " + toString(file_size), - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, + "{} has unexpected size: {} instead of {}", + fs::path(storage.getRelativePath()) / name, size, file_size); } @@ -64,7 +68,7 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r const String & name = it.first; if (!files.contains(name)) - throw Exception("Unexpected file " + name + " in data part", ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, "Unexpected file {} in data part", name); } for (const auto & it : files) @@ -73,18 +77,18 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r auto jt = rhs.files.find(name); if (jt == rhs.files.end()) - throw Exception("No file " + name + " in data part", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name); it.second.checkEqual(jt->second, have_uncompressed, name); } } -void MergeTreeDataPartChecksums::checkSizes(const DiskPtr & disk, const String & path) const +void MergeTreeDataPartChecksums::checkSizes(const IDataPartStorage & storage) const { for (const auto & it : files) { const String & name = it.first; - it.second.checkSize(disk, path + name); + it.second.checkSize(storage, name); } } @@ -109,7 +113,7 @@ bool MergeTreeDataPartChecksums::read(ReadBuffer & in, size_t format_version) case 4: return readV4(in); default: - throw Exception("Bad checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT); + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Bad checksums format version: {}", DB::toString(format_version)); } } @@ -271,7 +275,7 @@ MergeTreeDataPartChecksums MergeTreeDataPartChecksums::deserializeFrom(const Str ReadBufferFromString in(s); MergeTreeDataPartChecksums res; if (!res.read(in)) - throw Exception("Checksums format is too old", ErrorCodes::FORMAT_VERSION_TOO_OLD); + throw Exception(ErrorCodes::FORMAT_VERSION_TOO_OLD, "Checksums format is too old"); assertEOF(in); return res; } @@ -370,7 +374,7 @@ bool MinimalisticDataPartChecksums::deserialize(ReadBuffer & in) } if (format_version > MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS) - throw Exception("Unknown checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT); + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown checksums format version: {}", DB::toString(format_version)); deserializeWithoutHeader(in); @@ -482,11 +486,7 @@ void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChe if (!errors.empty()) { - String error_msg = "Checksums of parts don't match: " + errors.front(); - for (size_t i = 1; i < errors.size(); ++i) - error_msg += ", " + errors[i]; - - throw Exception(error_msg, ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksums of parts don't match: {}", fmt::join(errors, ", ")); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h index 15acb88aa0f..db110043b74 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.h +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.h @@ -7,13 +7,13 @@ #include #include - class SipHash; - namespace DB { +class IDataPartStorage; + /// Checksum of one file. struct MergeTreeDataPartChecksum { @@ -33,7 +33,7 @@ struct MergeTreeDataPartChecksum uncompressed_size(uncompressed_size_), uncompressed_hash(uncompressed_hash_) {} void checkEqual(const MergeTreeDataPartChecksum & rhs, bool have_uncompressed, const String & name) const; - void checkSize(const DiskPtr & disk, const String & path) const; + void checkSize(const IDataPartStorage & storage, const String & name) const; }; @@ -54,10 +54,7 @@ struct MergeTreeDataPartChecksums bool has(const String & file_name) const { return files.find(file_name) != files.end(); } - bool empty() const - { - return files.empty(); - } + bool empty() const { return files.empty(); } /// Checks that the set of columns and their checksums are the same. If not, throws an exception. /// If have_uncompressed, for compressed files it compares the checksums of the decompressed data. @@ -67,7 +64,7 @@ struct MergeTreeDataPartChecksums static bool isBadChecksumsErrorCode(int code); /// Checks that the directory contains all the needed files of the correct size. Does not check the checksum. - void checkSizes(const DiskPtr & disk, const String & path) const; + void checkSizes(const IDataPartStorage & storage) const; /// Returns false if the checksum is too old. bool read(ReadBuffer & in); diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index a537b44d9ea..a5fb11244c7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -18,16 +18,6 @@ namespace ErrorCodes extern const int BAD_SIZE_OF_FILE_IN_DATA_PART; } - -MergeTreeDataPartCompact::MergeTreeDataPartCompact( - MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::Compact, parent_part_) -{ -} - MergeTreeDataPartCompact::MergeTreeDataPartCompact( const MergeTreeData & storage_, const String & name_, @@ -99,7 +89,7 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( size_t columns_count, const IDataPartStorage & data_part_storage_) { if (!index_granularity_info_.mark_type.adaptive) - throw Exception("MergeTreeDataPartCompact cannot be created with non-adaptive granulary.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeDataPartCompact cannot be created with non-adaptive granulary."); auto marks_file_path = index_granularity_info_.getMarksFilePath("data"); if (!data_part_storage_.exists(marks_file_path)) @@ -137,7 +127,7 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( void MergeTreeDataPartCompact::loadIndexGranularity() { if (columns.empty()) - throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), getDataPartStorage()); } @@ -162,7 +152,7 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons { /// count.txt should be present even in non custom-partitioned parts if (!checksums.files.contains("count.txt")) - throw Exception("No checksum for count.txt", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for count.txt"); if (require_part_metadata) { @@ -210,7 +200,7 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons "Part {} is broken: bad size of marks file '{}': {}, must be: {}", getDataPartStorage().getRelativePath(), std::string(fs::path(getDataPartStorage().getFullPath()) / mrk_file_name), - std::to_string(file_size), std::to_string(expected_file_size)); + file_size, expected_file_size); } } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index e275c586cb9..ef553d290f7 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -28,12 +28,6 @@ public: const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); - MergeTreeDataPartCompact( - MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_ = nullptr); - MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index ac56868894f..20049976acf 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include @@ -19,17 +19,6 @@ namespace ErrorCodes extern const int DIRECTORY_ALREADY_EXISTS; } - -MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( - MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::InMemory, parent_part_) -{ - default_codec = CompressionCodecFactory::instance().get("NONE", {}); -} - MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( const MergeTreeData & storage_, const String & name_, @@ -76,17 +65,13 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & VolumePtr volume = storage.getStoragePolicy()->getVolume(0); VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume); - auto new_data_part_storage = std::make_shared( - data_part_volume, - storage.getRelativeDataPath(), - new_relative_path); + auto new_data_part = storage.getDataPartBuilder(name, data_part_volume, new_relative_path) + .withPartFormat(storage.choosePartFormatOnDisk(block.bytes(), rows_count)) + .build(); + auto new_data_part_storage = new_data_part->getDataPartStoragePtr(); new_data_part_storage->beginTransaction(); - auto current_full_path = getDataPartStorage().getFullPath(); - auto new_type = storage.choosePartTypeOnDisk(block.bytes(), rows_count); - auto new_data_part = storage.createPart(name, new_type, info, new_data_part_storage); - new_data_part->uuid = uuid; new_data_part->setColumns(columns, {}); new_data_part->partition.value = partition.value; @@ -97,7 +82,7 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & throw Exception( ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Could not flush part {}. Part in {} already exists", - quoteString(current_full_path), + quoteString(getDataPartStorage().getFullPath()), new_data_part_storage->getFullPath()); } @@ -107,40 +92,42 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & auto indices = MergeTreeIndexFactory::instance().getMany(metadata_snapshot->getSecondaryIndices()); MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, indices, compression_codec, NO_TRANSACTION_PTR); out.write(block); + const auto & projections = metadata_snapshot->getProjections(); for (const auto & [projection_name, projection] : projection_parts) { if (projections.has(projection_name)) { - auto projection_part_storage = new_data_part_storage->getProjection(projection_name + ".proj"); - if (projection_part_storage->exists()) + auto old_projection_part = asInMemoryPart(projection); + auto new_projection_part = new_data_part->getProjectionPartBuilder(projection_name) + .withPartFormat(storage.choosePartFormatOnDisk(old_projection_part->block.bytes(), rows_count)) + .build(); + + new_projection_part->is_temp = false; // clean up will be done on parent part + new_projection_part->setColumns(projection->getColumns(), {}); + + auto new_projection_part_storage = new_projection_part->getDataPartStoragePtr(); + if (new_projection_part_storage->exists()) { throw Exception( ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Could not flush projection part {}. Projection part in {} already exists", projection_name, - projection_part_storage->getFullPath()); + new_projection_part_storage->getFullPath()); } - auto projection_part = asInMemoryPart(projection); - auto projection_type = storage.choosePartTypeOnDisk(projection_part->block.bytes(), rows_count); - MergeTreePartInfo projection_info("all", 0, 0, 0); - auto projection_data_part - = storage.createPart(projection_name, projection_type, projection_info, projection_part_storage, parent_part); - projection_data_part->is_temp = false; // clean up will be done on parent part - projection_data_part->setColumns(projection->getColumns(), {}); - - projection_part_storage->createDirectories(); + new_projection_part_storage->createDirectories(); const auto & desc = projections.get(name); auto projection_compression_codec = storage.getContext()->chooseCompressionCodec(0, 0); auto projection_indices = MergeTreeIndexFactory::instance().getMany(desc.metadata->getSecondaryIndices()); MergedBlockOutputStream projection_out( - projection_data_part, desc.metadata, projection_part->columns, projection_indices, + new_projection_part, desc.metadata, + new_projection_part->getColumns(), projection_indices, projection_compression_codec, NO_TRANSACTION_PTR); - projection_out.write(projection_part->block); - projection_out.finalizePart(projection_data_part, false); - new_data_part->addProjectionPart(projection_name, std::move(projection_data_part)); + projection_out.write(old_projection_part->block); + projection_out.finalizePart(new_projection_part, false); + new_data_part->addProjectionPart(projection_name, std::move(new_projection_part)); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index acb1cd8c844..525c966476b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -17,12 +17,6 @@ public: const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); - MergeTreeDataPartInMemory( - MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_ = nullptr); - MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.cpp b/src/Storages/MergeTree/MergeTreeDataPartType.cpp deleted file mode 100644 index 59cea62121b..00000000000 --- a/src/Storages/MergeTree/MergeTreeDataPartType.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include -#include -#include - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int UNKNOWN_PART_TYPE; -} - -void MergeTreeDataPartType::fromString(const String & str) -{ - auto maybe_value = magic_enum::enum_cast(str); - if (!maybe_value || *maybe_value == Value::Unknown) - throw DB::Exception("Unexpected string for part type: " + str, ErrorCodes::UNKNOWN_PART_TYPE); - - value = *maybe_value; -} - -String MergeTreeDataPartType::toString() const -{ - return String(magic_enum::enum_name(value)); -} - -} diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.h b/src/Storages/MergeTree/MergeTreeDataPartType.h index b4b1a56c161..bd2acb9ef65 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.h +++ b/src/Storages/MergeTree/MergeTreeDataPartType.h @@ -1,10 +1,36 @@ #pragma once +#include #include +#include namespace DB { +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +template +requires std::is_enum_v +static E parseEnum(const String & str) +{ + auto value = magic_enum::enum_cast(str); + if (!value || *value == E::Unknown) + throw DB::Exception(ErrorCodes::BAD_ARGUMENTS, + "Unexpected string {} for enum {}", str, magic_enum::enum_type_name()); + + return *value; +} + +/// It's a bug in clang with three-way comparison operator +/// https://github.com/llvm/llvm-project/issues/55919 +#ifdef __clang__ + #pragma clang diagnostic push + #pragma clang diagnostic ignored "-Wzero-as-null-pointer-constant" +#endif + /// Types of data part format. class MergeTreeDataPartType { @@ -24,36 +50,50 @@ public: Unknown, }; - MergeTreeDataPartType() : value(Unknown) {} + MergeTreeDataPartType() : value(Value::Unknown) {} MergeTreeDataPartType(Value value_) : value(value_) {} /// NOLINT - bool operator==(const MergeTreeDataPartType & other) const - { - return value == other.value; - } - - bool operator!=(const MergeTreeDataPartType & other) const - { - return !(*this == other); - } - - bool operator<(const MergeTreeDataPartType & other) const - { - return value < other.value; - } - - bool operator>(const MergeTreeDataPartType & other) const - { - return value > other.value; - } - - void fromString(const String & str); - String toString() const; + auto operator<=>(const MergeTreeDataPartType &) const = default; Value getValue() const { return value; } + String toString() const { return String(magic_enum::enum_name(value)); } + void fromString(const String & str) { value = parseEnum(str); } private: Value value; }; +/// Types of data part storage format. +class MergeTreeDataPartStorageType +{ +public: + enum Value + { + Full, + Unknown, + }; + + MergeTreeDataPartStorageType() : value(Value::Unknown) {} + MergeTreeDataPartStorageType(Value value_) : value(value_) {} /// NOLINT + + auto operator<=>(const MergeTreeDataPartStorageType &) const = default; + + Value getValue() const { return value; } + String toString() const { return String(magic_enum::enum_name(value)); } + void fromString(const String & str) { value = parseEnum(str); } + +private: + Value value; +}; + +#ifdef __clang__ + #pragma clang diagnostic pop +#endif + +struct MergeTreeDataPartFormat +{ + MergeTreeDataPartType part_type; + MergeTreeDataPartStorageType storage_type; +}; + } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 2418960f992..016aa2b7984 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -17,16 +17,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - -MergeTreeDataPartWide::MergeTreeDataPartWide( - MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_) - : IMergeTreeDataPart(storage_, name_, data_part_storage_, Type::Wide, parent_part_) -{ -} - MergeTreeDataPartWide::MergeTreeDataPartWide( const MergeTreeData & storage_, const String & name_, @@ -160,7 +150,7 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl( void MergeTreeDataPartWide::loadIndexGranularity() { if (columns.empty()) - throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), getFileNameForColumn(columns.front())); } @@ -207,7 +197,7 @@ void MergeTreeDataPartWide::checkConsistency(bool require_part_metadata) const if (!checksums.files.contains(bin_file_name)) throw Exception( ErrorCodes::NO_FILE_IN_DATA_PART, - "No {} file checksum for column {} in part ", + "No {} file checksum for column {} in part {}", bin_file_name, name_type.name, getDataPartStorage().getFullPath()); }); } @@ -302,7 +292,8 @@ void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_col throw Exception( ErrorCodes::LOGICAL_ERROR, "Column {} has rows count {} according to size in memory " - "and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count); + "and size of single value, but data part {} has {} rows", + backQuote(column.name), rows_in_column, name, rows_count); } } #endif diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 601bdff51a1..0d68334a623 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -23,12 +23,6 @@ public: const MutableDataPartStoragePtr & data_part_storage_, const IMergeTreeDataPart * parent_part_ = nullptr); - MergeTreeDataPartWide( - MergeTreeData & storage_, - const String & name_, - const MutableDataPartStoragePtr & data_part_storage_, - const IMergeTreeDataPart * parent_part_ = nullptr); - MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 4c1d117ac73..94c3651f1f9 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -9,6 +9,13 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +static CompressionCodecPtr getMarksCompressionCodec(const String & marks_compression_codec) +{ + ParserCodec codec_parser; + auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + return CompressionCodecFactory::instance().get(ast, nullptr); +} + MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const MergeTreeMutableDataPartPtr & data_part_, const NamesAndTypesList & columns_list_, @@ -38,7 +45,7 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( { marks_compressor = std::make_unique( *marks_file_hashing, - settings_.getMarksCompressionCodec(), + getMarksCompressionCodec(settings_.marks_compression_codec), settings_.marks_compress_block_size); marks_source_hashing = std::make_unique(*marks_compressor); @@ -87,7 +94,9 @@ namespace Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, size_t block_rows, size_t current_mark, bool last_block) { if (current_mark >= index_granularity.getMarksCount()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Request to get granules from mark {} but index granularity size is {}", current_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Request to get granules from mark {} but index granularity size is {}", + current_mark, index_granularity.getMarksCount()); Granules result; size_t current_row = 0; @@ -99,7 +108,9 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, { /// Invariant: we always have equal amount of rows for block in compact parts because we accumulate them in buffer. /// The only exclusion is the last block, when we cannot accumulate more rows. - throw Exception(ErrorCodes::LOGICAL_ERROR, "Required to write {} rows, but only {} rows was written for the non last granule", expected_rows_in_mark, rows_left_in_block); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Required to write {} rows, but only {} rows was written for the non last granule", + expected_rows_in_mark, rows_left_in_block); } result.emplace_back(Granule{ diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index 8066a097499..9afa7a1e80d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -22,7 +22,7 @@ void MergeTreeDataPartWriterInMemory::write( const Block & block, const IColumn::Permutation * permutation) { if (part_in_memory->block) - throw Exception("DataPartWriterInMemory supports only one write", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "DataPartWriterInMemory supports only one write"); Block primary_key_block; if (settings.rewrite_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index e9629f83d09..1f40177d0fa 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include "IO/WriteBufferFromFileDecorator.h" @@ -112,7 +112,8 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( , compress_primary_key(settings.compress_primary_key) { if (settings.blocks_are_granules_size && !index_granularity.empty()) - throw Exception("Can't take information about index granularity from blocks, when non empty index_granularity array specified", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Can't take information about index granularity from blocks, when non empty index_granularity array specified"); if (!data_part->getDataPartStorage().exists()) data_part->getDataPartStorage().createDirectories(); @@ -214,7 +215,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() settings.query_write_settings)); GinIndexStorePtr store = nullptr; - if (dynamic_cast(&*index_helper) != nullptr) + if (dynamic_cast(&*index_helper) != nullptr) { store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; @@ -275,15 +276,13 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block auto & stream = *skip_indices_streams[i]; WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; - GinIndexStorePtr store = nullptr; - if (dynamic_cast(&*index_helper) != nullptr) + GinIndexStorePtr store; + if (dynamic_cast(&*index_helper) != nullptr) { String stream_name = index_helper->getFileName(); auto it = gin_index_stores.find(stream_name); - if (it == gin_index_stores.cend()) - { - throw Exception("Index '" + stream_name + "' does not exist", ErrorCodes::LOGICAL_ERROR); - } + if (it == gin_index_stores.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index '{}' does not exist", stream_name); store = it->second; } @@ -400,9 +399,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync) stream->sync(); } for (auto & store: gin_index_stores) - { store.second->finalize(); - } gin_index_stores.clear(); skip_indices_streams.clear(); skip_indices_aggregators.clear(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 2377a129ac0..b76b74ab717 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -162,7 +162,7 @@ protected: /// Data is already written up to this mark. size_t current_mark = 0; - GinIndexStores gin_index_stores; + GinIndexStoreFactory::GinIndexStores gin_index_stores; private: void initSkipIndices(); void initPrimaryIndex(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 62917bcb084..cce459c1ba8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -26,7 +26,9 @@ namespace Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, size_t block_rows, size_t current_mark, size_t rows_written_in_last_mark) { if (current_mark >= index_granularity.getMarksCount()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Request to get granules from mark {} but index granularity size is {}", current_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Request to get granules from mark {} but index granularity size is {}", + current_mark, index_granularity.getMarksCount()); Granules result; size_t current_row = 0; @@ -157,9 +159,9 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri { if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granules are not allowed while blocks are granules size. " - "Mark number {} (rows {}), rows written in last mark {}, rows to write in last mark from block {} (from row {}), total marks currently {}", - last_granule.mark_number, index_granularity.getMarkRows(last_granule.mark_number), rows_written_in_last_mark, - last_granule.rows_to_write, last_granule.start_row, index_granularity.getMarksCount()); + "Mark number {} (rows {}), rows written in last mark {}, rows to write in last mark from block {} (from row {}), " + "total marks currently {}", last_granule.mark_number, index_granularity.getMarkRows(last_granule.mark_number), + rows_written_in_last_mark, last_granule.rows_to_write, last_granule.start_row, index_granularity.getMarksCount()); /// Shift forward except last granule setCurrentMark(getCurrentMark() + granules_written.size() - 1); @@ -345,7 +347,8 @@ void MergeTreeDataPartWriterWide::writeColumn( const Granules & granules) { if (granules.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty granules for column {}, current mark {}", backQuoteIfNeed(name_and_type.name), getCurrentMark()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty granules for column {}, current mark {}", + backQuoteIfNeed(name_and_type.name), getCurrentMark()); const auto & [name, type] = name_and_type; auto [it, inserted] = serialization_states.emplace(name, nullptr); @@ -371,7 +374,10 @@ void MergeTreeDataPartWriterWide::writeColumn( if (granule.mark_on_start) { if (last_non_written_marks.contains(name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "We have to add new mark for column, but already have non written mark. Current mark {}, total marks {}, offset {}", getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "We have to add new mark for column, but already have non written mark. " + "Current mark {}, total marks {}, offset {}", + getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark); last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, offset_columns); } @@ -442,7 +448,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai for (mark_num = 0; !mrk_in->eof(); ++mark_num) { if (mark_num > index_granularity.getMarksCount()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect number of marks in memory {}, on disk (at least) {}", index_granularity.getMarksCount(), mark_num + 1); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incorrect number of marks in memory {}, on disk (at least) {}", + index_granularity.getMarksCount(), mark_num + 1); DB::readBinary(offset_in_compressed_file, *mrk_in); DB::readBinary(offset_in_decompressed_block, *mrk_in); @@ -454,7 +462,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (must_be_last) { if (index_granularity_rows != 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "We ran out of binary data but still have non empty mark #{} with rows number {}", mark_num, index_granularity_rows); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "We ran out of binary data but still have non empty mark #{} with rows number {}", + mark_num, index_granularity_rows); if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark #{} must be last, but we still have some to read", mark_num); @@ -469,20 +479,28 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai serialization->deserializeBinaryBulk(*column, bin_in, 1000000000, 0.0); throw Exception(ErrorCodes::LOGICAL_ERROR, - "Still have {} rows in bin stream, last mark #{} index granularity size {}, last rows {}", column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); + "Still have {} rows in bin stream, last mark #{}" + " index granularity size {}, last rows {}", + column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); } if (index_granularity_rows > data_part->index_granularity_info.fixed_index_granularity) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark #{} has {} rows, but max fixed granularity is {}, index granularity size {}", - mark_num, index_granularity_rows, data_part->index_granularity_info.fixed_index_granularity, index_granularity.getMarksCount()); + mark_num, index_granularity_rows, data_part->index_granularity_info.fixed_index_granularity, + index_granularity.getMarksCount()); } if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) throw Exception( - ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{} (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", - data_part->getDataPartStorage().getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); + ErrorCodes::LOGICAL_ERROR, + "Incorrect mark rows for part {} for mark #{}" + " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", + data_part->getDataPartStorage().getFullPath(), + mark_num, offset_in_compressed_file, offset_in_decompressed_block, + index_granularity.getMarkRows(mark_num), index_granularity_rows, + index_granularity.getMarksCount()); auto column = type->createColumn(); @@ -513,14 +531,18 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai } throw Exception( - ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for mark #{} (compressed offset {}, decompressed offset {}), actually in bin file {}, in mrk file {}, total marks {}", - mark_num, offset_in_compressed_file, offset_in_decompressed_block, column->size(), index_granularity.getMarkRows(mark_num), index_granularity.getMarksCount()); + ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for mark #{} (compressed offset {}, decompressed offset {}), " + "actually in bin file {}, in mrk file {}, total marks {}", + mark_num, offset_in_compressed_file, offset_in_decompressed_block, column->size(), + index_granularity.getMarkRows(mark_num), index_granularity.getMarksCount()); } } if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Still have something in marks stream, last mark #{} index granularity size {}, last rows {}", mark_num, index_granularity.getMarksCount(), index_granularity_rows); + "Still have something in marks stream, last mark #{}" + " index granularity size {}, last rows {}", + mark_num, index_granularity.getMarksCount(), index_granularity_rows); if (!bin_in.eof()) { auto column = type->createColumn(); @@ -528,7 +550,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai serialization->deserializeBinaryBulk(*column, bin_in, 1000000000, 0.0); throw Exception(ErrorCodes::LOGICAL_ERROR, - "Still have {} rows in bin stream, last mark #{} index granularity size {}, last rows {}", column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); + "Still have {} rows in bin stream, last mark #{}" + " index granularity size {}, last rows {}", + column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); } } @@ -543,9 +567,11 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum if (rows_written_in_last_mark > 0) { if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granule is not allowed while blocks are granules size even for last granule. " - "Mark number {} (rows {}), rows written for last mark {}, total marks {}", - getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), rows_written_in_last_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incomplete granule is not allowed while blocks are granules size even for last granule. " + "Mark number {} (rows {}), rows written for last mark {}, total marks {}", + getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), + rows_written_in_last_mark, index_granularity.getMarksCount()); adjustLastMarkIfNeedAndFlushToDisk(rows_written_in_last_mark); } @@ -656,7 +682,8 @@ static void fillIndexGranularityImpl( void MergeTreeDataPartWriterWide::fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) { if (getCurrentMark() < index_granularity.getMarksCount() && getCurrentMark() != index_granularity.getMarksCount() - 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to add marks, while current mark {}, but total marks {}", getCurrentMark(), index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to add marks, while current mark {}, but total marks {}", + getCurrentMark(), index_granularity.getMarksCount()); size_t index_offset = 0; if (rows_written_in_last_mark != 0) @@ -683,8 +710,10 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ if (compute_granularity && settings.can_use_adaptive_granularity) { if (getCurrentMark() != index_granularity.getMarksCount() - 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Non last mark {} (with {} rows) having rows offset {}, total marks {}", - getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), rows_written_in_last_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Non last mark {} (with {} rows) having rows offset {}, total marks {}", + getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), + rows_written_in_last_mark, index_granularity.getMarksCount()); index_granularity.popMark(); index_granularity.appendMark(new_rows_in_last_mark); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9cae53c71c7..512f194ea53 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -171,9 +171,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( if (plan->isInitialized() && settings.allow_experimental_projection_optimization && settings.force_optimize_projection && !metadata_for_reading->projections.empty()) - throw Exception( - "No projection is used when allow_experimental_projection_optimization = 1 and force_optimize_projection = 1", - ErrorCodes::PROJECTION_NOT_USED); + throw Exception(ErrorCodes::PROJECTION_NOT_USED, + "No projection is used when allow_experimental_projection_optimization = 1 and force_optimize_projection = 1"); return plan; } @@ -520,14 +519,14 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( relative_sample_size.assign(sample_size_ratio->numerator, sample_size_ratio->denominator); if (relative_sample_size < 0) - throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Negative sample size"); relative_sample_offset = 0; if (sample_offset_ratio) relative_sample_offset.assign(sample_offset_ratio->numerator, sample_offset_ratio->denominator); if (relative_sample_offset < 0) - throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Negative sample offset"); /// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to /// read) into the relative `SAMPLE 0.1` (how much data to read). @@ -546,7 +545,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( relative_sample_size = 0; if (relative_sample_offset > 0 && RelativeSize(0) == relative_sample_size) - throw Exception("Sampling offset is incorrect because no sampling", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Sampling offset is incorrect because no sampling"); if (relative_sample_offset > 1) { @@ -623,10 +622,9 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( } if (size_of_universum == RelativeSize(0)) - throw Exception( - "Invalid sampling column type in storage parameters: " + sampling_column_type->getName() - + ". Must be one unsigned integer type", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid sampling column type in storage parameters: {}. Must be one unsigned integer type", + sampling_column_type->getName()); if (settings.parallel_replicas_count > 1) { @@ -695,7 +693,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( { if (!key_condition.addCondition( sampling_key.column_names[0], Range::createLeftBounded(lower, true, sampling_key.data_types[0]->isNullable()))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Sampling column not in primary key"); ASTPtr args = std::make_shared(); args->children.push_back(sampling_key_ast); @@ -713,7 +711,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( { if (!key_condition.addCondition( sampling_key.column_names[0], Range::createRightBounded(upper, false, sampling_key.data_types[0]->isNullable()))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Sampling column not in primary key"); ASTPtr args = std::make_shared(); args->children.push_back(sampling_key_ast); @@ -1691,10 +1689,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( PostingsCacheForStore cache_in_store; - if (dynamic_cast(&*index_helper) != nullptr) - { + if (dynamic_cast(&*index_helper) != nullptr) cache_in_store.store = GinIndexStoreFactory::instance().get(index_helper->getFileName(), part->getDataPartStoragePtr()); - } for (size_t i = 0; i < ranges.size(); ++i) { @@ -1709,7 +1705,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( { if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) granule = reader.read(); - const auto * gin_filter_condition = dynamic_cast(&*condition); + const auto * gin_filter_condition = dynamic_cast(&*condition); // Cast to Ann condition auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) @@ -1736,7 +1732,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( continue; } - bool result{false}; + bool result = false; if (!gin_filter_condition) result = condition->mayBeTrueOnGranule(granule); else @@ -1988,7 +1984,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( { auto result = temp_part_uuids.insert(part->uuid); if (!result.second) - throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Found a part with the same UUID on the same replica."); } selected_parts.push_back(part_or_projection); @@ -2022,7 +2018,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( /// Second attempt didn't help, throw an exception if (!select_parts(parts)) - throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS); + throw Exception(ErrorCodes::DUPLICATED_PART_UUIDS, "Found duplicate UUIDs while processing query."); } } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 03a3d4fbd72..93b0abeca35 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -76,7 +76,15 @@ void buildScatterSelector( if (inserted) { if (max_parts && partitions_count >= max_parts) - throw Exception("Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).", ErrorCodes::TOO_MANY_PARTS); + throw Exception(ErrorCodes::TOO_MANY_PARTS, + "Too many partitions for single INSERT block (more than {}). " + "The limit is controlled by 'max_partitions_per_insert_block' setting. " + "Large number of partitions is a common misconception. " + "It will lead to severe negative performance impact, including slow server startup, " + "slow INSERT queries and slow SELECT queries. Recommended total number of partitions " + "for a table is under 1000..10000. Please note, that partitioning is not intended " + "to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). " + "Partitions are intended for data manipulation (DROP PARTITION, etc).", max_parts); partition_num_to_first_row.push_back(i); it->getMapped() = partitions_count; @@ -129,10 +137,10 @@ void updateTTL( ttl_info.update(column_const->getValue()); } else - throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column"); } else - throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column"); if (update_part_min_max_ttls) ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); @@ -144,6 +152,10 @@ void MergeTreeDataWriter::TemporaryPart::finalize() { for (auto & stream : streams) stream.finalizer.finish(); + + part->getDataPartStorage().precommitTransaction(); + for (const auto & [_, projection] : part->getProjectionParts()) + projection->getDataPartStorage().precommitTransaction(); } std::vector scatterOffsetsBySelector(ChunkOffsetsPtr chunk_offsets, const IColumn::Selector & selector, size_t partition_num) @@ -309,13 +321,13 @@ Block MergeTreeDataWriter::mergeBlock( /// Check that after first merge merging_algorithm is waiting for data from input 0. if (status.required_source != 0) - throw Exception("Logical error: required source after the first merge is not 0.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: required source after the first merge is not 0."); status = merging_algorithm->merge(); /// Check that merge is finished. if (!status.is_finished) - throw Exception("Logical error: merge is not finished after the second merge.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge is not finished after the second merge."); /// Merged Block is sorted and we don't need to use permutation anymore permutation = nullptr; @@ -364,7 +376,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( auto max_month = date_lut.toNumYYYYMM(max_date); if (min_month != max_month) - throw Exception("Logical error: part spans more than one month.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: part spans more than one month."); part_name = new_part_info.getPartNameV0(min_date, max_date); } @@ -384,6 +396,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( { part_dir = part_name; } + temp_part.temporary_directory_lock = data.getTemporaryPartDirectoryHolder(part_dir); /// If we need to calculate some columns to sort. @@ -435,19 +448,14 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( VolumePtr volume = data.getStoragePolicy()->getVolume(0); VolumePtr data_part_volume = createVolumeFromReservation(reservation, volume); - auto data_part_storage = std::make_shared( - data_part_volume, - data.relative_data_path, - part_dir); + auto new_data_part = data.getDataPartBuilder(part_name, data_part_volume, part_dir) + .withPartFormat(data.choosePartFormat(expected_size, block.rows())) + .withPartInfo(new_part_info) + .build(); + auto data_part_storage = new_data_part->getDataPartStoragePtr(); data_part_storage->beginTransaction(); - auto new_data_part = data.createPart( - part_name, - data.choosePartType(expected_size, block.rows()), - new_part_info, - data_part_storage); - if (data.storage_settings.get()->assign_part_uuids) new_data_part->uuid = UUIDHelpers::generateV4(); @@ -550,8 +558,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( const ProjectionDescription & projection) { TemporaryPart temp_part; - const StorageMetadataPtr & metadata_snapshot = projection.metadata; - MergeTreePartInfo new_part_info("all", 0, 0, 0); + const auto & metadata_snapshot = projection.metadata; MergeTreeDataPartType part_type; if (parent_part->getType() == MergeTreeDataPartType::InMemory) @@ -564,21 +571,15 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeProjectionPartImpl( size_t expected_size = block.bytes(); // just check if there is enough space on parent volume data.reserveSpace(expected_size, parent_part->getDataPartStorage()); - part_type = data.choosePartTypeOnDisk(expected_size, block.rows()); + part_type = data.choosePartFormatOnDisk(expected_size, block.rows()).part_type; } - auto relative_path = part_name + (is_temp ? ".tmp_proj" : ".proj"); - auto projection_part_storage = parent_part->getDataPartStorage().getProjection(relative_path, !is_temp); + auto new_data_part = parent_part->getProjectionPartBuilder(part_name, is_temp).withPartType(part_type).build(); + auto projection_part_storage = new_data_part->getDataPartStoragePtr(); + if (is_temp) projection_part_storage->beginTransaction(); - auto new_data_part = data.createPart( - part_name, - part_type, - new_part_info, - projection_part_storage, - parent_part); - new_data_part->is_temp = is_temp; NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); @@ -687,7 +688,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempProjectionPart( IMergeTreeDataPart * parent_part, size_t block_num) { - String part_name = fmt::format("{}_{}", projection.name, block_num); + auto part_name = fmt::format("{}_{}", projection.name, block_num); return writeProjectionPartImpl( part_name, true /* is_temp */, diff --git a/src/Storages/MergeTree/MergeTreeIOSettings.h b/src/Storages/MergeTree/MergeTreeIOSettings.h index 2020796f925..2f269663cd1 100644 --- a/src/Storages/MergeTree/MergeTreeIOSettings.h +++ b/src/Storages/MergeTree/MergeTreeIOSettings.h @@ -3,8 +3,6 @@ #include #include #include -#include -#include namespace DB @@ -59,13 +57,6 @@ struct MergeTreeWriterSettings { } - CompressionCodecPtr getMarksCompressionCodec() const - { - ParserCodec codec_parser; - auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(marks_compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - return CompressionCodecFactory::instance().get(ast, nullptr); - } - size_t min_compress_block_size; size_t max_compress_block_size; diff --git a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp index 9bc0e4e6dc0..fe5a2a861f6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp @@ -41,8 +41,8 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilter::getGranuleAndReset void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception("The provided position is not less than the number of block rows. Position: " + toString(*pos) + ", Block rows: " + - toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); Block granule_index_block; size_t max_read_rows = std::min(block.rows() - *pos, limit); diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 4dd0614015c..f64d6104ac6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -142,7 +142,7 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t return; if (index_sample_block.columns() > 1) - throw Exception("Only one column is supported", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one column is supported"); auto index_column_name = index_sample_block.getByPosition(0).name; const auto & column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); @@ -208,7 +208,7 @@ MergeTreeIndexConditionAnnoy::MergeTreeIndexConditionAnnoy( bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /* idx_granule */) const { - throw Exception("mayBeTrueOnGranule is not supported for ANN skip indexes", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes"); } bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const @@ -248,13 +248,14 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRangesImpl(MergeTreeI auto granule = std::dynamic_pointer_cast >(idx_granule); if (granule == nullptr) - throw Exception("Granule has the wrong type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); auto annoy = granule->index; if (condition.getNumOfDimensions() != annoy->getNumOfDimensions()) - throw Exception("The dimension of the space in the request (" + toString(condition.getNumOfDimensions()) + ") " - + "does not match with the dimension in the index (" + toString(annoy->getNumOfDimensions()) + ")", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) " + "does not match with the dimension in the index ({})", + toString(condition.getNumOfDimensions()), toString(annoy->getNumOfDimensions())); /// neighbors contain indexes of dots which were closest to target vector std::vector neighbors; @@ -273,7 +274,7 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRangesImpl(MergeTreeI } catch (...) { - throw Exception("Setting of the annoy index should be int", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting of the annoy index should be int"); } } annoy->get_nns_by_vector(target_vec.data(), limit, k_search, &neighbors, &distances); @@ -333,12 +334,12 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) { if (!index.arguments[0].tryGet(distance_name)) { - throw Exception("Can't parse first argument", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse first argument"); } } if (index.arguments.size() > 1 && !index.arguments[1].tryGet(distance_name)) { - throw Exception("Can't parse second argument", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse second argument"); } return std::make_shared(index, param, distance_name); } @@ -381,20 +382,20 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) { if (index.arguments.size() > 2) { - throw Exception("Annoy index must not have more than two parameters", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index must not have more than two parameters"); } if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64 && index.arguments[0].getType() != Field::Types::String) { - throw Exception("Annoy index first argument must be UInt64 or String.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index first argument must be UInt64 or String."); } if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String) { - throw Exception("Annoy index second argument must be String.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index second argument must be String."); } if (index.column_names.size() != 1 || index.data_types.size() != 1) - throw Exception("Annoy indexes must be created on a single column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Annoy indexes must be created on a single column"); assertIndexColumnsType(index.sample_block); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index c2ed081ac00..7a8a28b24aa 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -78,7 +78,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const static void assertIndexColumnsType(const Block & header) { if (!header || !header.columns()) - throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Index must have columns."); const DataTypes & columns_data_types = header.getDataTypes(); @@ -89,8 +89,7 @@ static void assertIndexColumnsType(const Block & header) if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() && !which.isDate() && !which.isDateTime() && !which.isDateTime64() && !which.isEnum() && !which.isUUID()) - throw Exception("Unexpected type " + type->getName() + " of bloom filter index.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type {} of bloom filter index.", type->getName()); } } @@ -118,7 +117,7 @@ void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach) if (index.arguments.size() > 1) { if (!attach) /// This is for backward compatibility. - throw Exception("BloomFilter index cannot have more than one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "BloomFilter index cannot have more than one parameter."); } if (!index.arguments.empty()) @@ -126,7 +125,7 @@ void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach) const auto & argument = index.arguments[0]; if (!attach && (argument.getType() != Field::Types::Float64 || argument.get() < 0 || argument.get() > 1)) - throw Exception("The BloomFilter false positive must be a double number between 0 and 1.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The BloomFilter false positive must be a double number between 0 and 1."); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 4fd4314e53f..235d90bb974 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -58,7 +58,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & const auto * non_const_column = typeid_cast(hash_column); if (!const_column && !non_const_column) - throw Exception("LOGICAL ERROR: hash column must be Const Column or UInt64 Column.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: hash column must be Const Column or UInt64 Column."); if (const_column) { @@ -174,7 +174,7 @@ bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 || arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } return rpn_stack[0]; @@ -245,11 +245,11 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex rpn_stack.emplace_back(false, true); } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::mayBeTrueInRange"); return rpn_stack[0].can_be_true; } @@ -574,7 +574,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (function_name == "has" || function_name == "indexOf") { if (!array_type) - throw Exception("First argument for function " + function_name + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an array.", function_name); /// We can treat `indexOf` function similar to `has`. /// But it is little more cumbersome, compare: `has(arr, elem)` and `indexOf(arr, elem) != 0`. @@ -627,7 +627,8 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( else { if (array_type) - throw Exception("An array type of bloom_filter supports only has(), indexOf(), and hasAny() functions.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "An array type of bloom_filter supports only has(), indexOf(), and hasAny() functions."); out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type); diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index 5d7ea371a83..952948fd582 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -53,7 +53,7 @@ public: if (const auto & bf_granule = typeid_cast(granule.get())) return mayBeTrueOnGranule(bf_granule); - throw Exception("LOGICAL ERROR: require bloom filter index granule.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: require bloom filter index granule."); } private: diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 411141f028b..35ca484cff0 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -91,9 +91,8 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorFullText::getGranuleAndReset() void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); @@ -225,19 +224,19 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 || arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } return rpn_stack[0]; } +/// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { std::shared_ptr granule = std::dynamic_pointer_cast(idx_granule); if (!granule) - throw Exception( - "BloomFilter index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BloomFilter index condition got a granule with the wrong type."); /// Check like in KeyCondition. std::vector rpn_stack; @@ -314,11 +313,11 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in BloomFilterCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in BloomFilterCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in BloomFilterCondition::mayBeTrueOnGranule", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in BloomFilterCondition::mayBeTrueOnGranule"); return rpn_stack[0].can_be_true; } @@ -469,6 +468,10 @@ bool MergeTreeConditionFullText::traverseTreeEquals( { key_column_num = map_keys_key_column_num; key_exists = true; + + auto const_data_type = WhichDataType(const_type); + if (!const_data_type.isStringOrFixedString() && !const_data_type.isArray()) + return false; } else { @@ -646,6 +649,8 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( std::vector key_position; Columns columns = prepared_set->getSetElements(); + size_t prepared_set_total_row_count = prepared_set->getTotalRowCount(); + for (const auto & elem : key_tuple_mapping) { bloom_filters.emplace_back(); @@ -653,7 +658,8 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( size_t tuple_idx = elem.tuple_index; const auto & column = columns[tuple_idx]; - for (size_t row = 0; row < prepared_set->getTotalRowCount(); ++row) + + for (size_t row = 0; row < prepared_set_total_row_count; ++row) { bloom_filters.back().emplace_back(params); auto ref = column->getDataAt(row); @@ -716,7 +722,7 @@ MergeTreeIndexPtr bloomFilterIndexCreator( } else { - throw Exception("Unknown index type: " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index type: {}", backQuote(index.name)); } } @@ -738,29 +744,32 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/) } if (!data_type.isString() && !data_type.isFixedString()) - throw Exception("Bloom filter index can be used only with `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Bloom filter index can be used only with `String`, `FixedString`, " + "`LowCardinality(String)`, `LowCardinality(FixedString)` column " + "or Array with `String` or `FixedString` values column."); } if (index.type == NgramTokenExtractor::getName()) { if (index.arguments.size() != 4) - throw Exception("`ngrambf` index must have exactly 4 arguments.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "`ngrambf` index must have exactly 4 arguments."); } else if (index.type == SplitTokenExtractor::getName()) { if (index.arguments.size() != 3) - throw Exception("`tokenbf` index must have exactly 3 arguments.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "`tokenbf` index must have exactly 3 arguments."); } else { - throw Exception("Unknown index type: " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index type: {}", backQuote(index.name)); } assert(index.arguments.size() >= 3); for (const auto & arg : index.arguments) if (arg.getType() != Field::Types::UInt64) - throw Exception("All parameters to *bf_v1 index must be unsigned integers", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "All parameters to *bf_v1 index must be unsigned integers"); /// Just validate BloomFilterParameters params( diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 11e1f9efcc2..4e339964de3 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -89,13 +89,13 @@ std::string MarkType::getFileExtension() const } -std::optional MergeTreeIndexGranularityInfo::getMarksExtensionFromFilesystem(const IDataPartStorage & data_part_storage) +std::optional MergeTreeIndexGranularityInfo::getMarksTypeFromFilesystem(const IDataPartStorage & data_part_storage) { if (data_part_storage.exists()) for (auto it = data_part_storage.iterate(); it->isValid(); it->next()) if (it->isFile()) if (std::string ext = fs::path(it->name()).extension(); MarkType::isMarkFileExtension(ext)) - return ext; + return MarkType(ext); return {}; } @@ -112,8 +112,8 @@ MergeTreeIndexGranularityInfo::MergeTreeIndexGranularityInfo(const MergeTreeData void MergeTreeIndexGranularityInfo::changeGranularityIfRequired(const IDataPartStorage & data_part_storage) { - auto mrk_ext = getMarksExtensionFromFilesystem(data_part_storage); - if (mrk_ext && !MarkType(*mrk_ext).adaptive) + auto mrk_type = getMarksTypeFromFilesystem(data_part_storage); + if (mrk_type && !mrk_type->adaptive) { mark_type.adaptive = false; index_granularity_bytes = 0; @@ -129,7 +129,7 @@ size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) con else if (mark_type.part_type == MergeTreeDataPartType::InMemory) return 0; else - throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE); + throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown part type"); } size_t getAdaptiveMrkSizeCompact(size_t columns_num) diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h index aed3081d3d0..4cb35ee64b1 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.h @@ -57,7 +57,7 @@ public: size_t getMarkSizeInBytes(size_t columns_num = 1) const; - static std::optional getMarksExtensionFromFilesystem(const IDataPartStorage & data_part_storage); + static std::optional getMarksTypeFromFilesystem(const IDataPartStorage & data_part_storage); }; constexpr inline auto getNonAdaptiveMrkSizeWide() { return sizeof(UInt64) * 2; } diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index 64fa7264738..267708b5312 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -43,7 +43,7 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( : total_rows(total_rows_), bits_per_row(bits_per_row_), hash_functions(hash_functions_) { if (granule_index_blocks_.empty() || !total_rows) - throw Exception("LOGICAL ERROR: granule_index_blocks empty or total_rows is zero.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_blocks empty or total_rows is zero."); assertGranuleBlocksStructure(granule_index_blocks_); @@ -52,7 +52,7 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( Block granule_index_block = granule_index_blocks_[index]; if (unlikely(!granule_index_block || !granule_index_block.rows())) - throw Exception("LOGICAL ERROR: granule_index_block is empty.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_block is empty."); if (index == 0) { diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index d8765ddb9bc..2b7a40b429a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -81,7 +81,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() co MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition( const SelectQueryInfo &, ContextPtr) const { - throw Exception("Not supported", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supported"); } MergeTreeIndexMergedConditionPtr MergeTreeIndexHypothesis::createIndexMergedCondition( @@ -104,7 +104,7 @@ MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index) void hypothesisIndexValidator(const IndexDescription & index, bool /*attach*/) { if (index.expression_list_ast->children.size() != 1) - throw Exception("Hypothesis index needs exactly one expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Hypothesis index needs exactly one expression"); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp index c62b5e86c75..1ab64fc84c7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp @@ -142,7 +142,7 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree { const auto granule = std::dynamic_pointer_cast(index_granule); if (!granule) - throw Exception("Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Only hypothesis index is supported here."); values.push_back(granule->met); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGin.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp similarity index 84% rename from src/Storages/MergeTree/MergeTreeIndexGin.cpp rename to src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 26f3fcb4fb6..02222aa530c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGin.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -1,30 +1,28 @@ - -#include +#include #include -#include +#include +#include +#include #include -#include +#include +#include +#include #include -#include +#include #include #include +#include #include #include -#include -#include -#include -#include #include #include #include -#include - #include -#include -#include -#include -#include +#include +#include +#include +#include namespace DB @@ -35,19 +33,18 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; } -MergeTreeIndexGranuleGinFilter::MergeTreeIndexGranuleGinFilter( +MergeTreeIndexGranuleInverted::MergeTreeIndexGranuleInverted( const String & index_name_, size_t columns_number, const GinFilterParameters & params_) : index_name(index_name_) , params(params_) - , gin_filters( - columns_number, GinFilter(params)) + , gin_filters(columns_number, GinFilter(params)) , has_elems(false) { } -void MergeTreeIndexGranuleGinFilter::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleInverted::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty fulltext index {}.", backQuote(index_name)); @@ -59,11 +56,11 @@ void MergeTreeIndexGranuleGinFilter::serializeBinary(WriteBuffer & ostr) const { size_t filter_size = gin_filter.getFilter().size(); size_serialization->serializeBinary(filter_size, ostr, {}); - ostr.write(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinFilter::GinSegmentWithRowIDRanges::value_type)); + ostr.write(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinSegmentWithRowIdRangeVector::value_type)); } } -void MergeTreeIndexGranuleGinFilter::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) +void MergeTreeIndexGranuleInverted::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) { if (version != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); @@ -81,13 +78,13 @@ void MergeTreeIndexGranuleGinFilter::deserializeBinary(ReadBuffer & istr, MergeT continue; gin_filter.getFilter().assign(filter_size, {}); - istr.readStrict(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinFilter::GinSegmentWithRowIDRanges::value_type)); + istr.readStrict(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinSegmentWithRowIdRangeVector::value_type)); } has_elems = true; } -MergeTreeIndexAggregatorGinFilter::MergeTreeIndexAggregatorGinFilter( +MergeTreeIndexAggregatorInverted::MergeTreeIndexAggregatorInverted( GinIndexStorePtr store_, const Names & index_columns_, const String & index_name_, @@ -99,37 +96,34 @@ MergeTreeIndexAggregatorGinFilter::MergeTreeIndexAggregatorGinFilter( , params(params_) , token_extractor(token_extractor_) , granule( - std::make_shared( + std::make_shared( index_name, index_columns.size(), params)) { } -MergeTreeIndexGranulePtr MergeTreeIndexAggregatorGinFilter::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorInverted::getGranuleAndReset() { - auto new_granule = std::make_shared( + auto new_granule = std::make_shared( index_name, index_columns.size(), params); new_granule.swap(granule); return new_granule; } -void MergeTreeIndexAggregatorGinFilter::addToGinFilter(UInt32 rowID, const char* data, size_t length, GinFilter& gin_filter, UInt64 limit) +void MergeTreeIndexAggregatorInverted::addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter, UInt64 limit) { size_t cur = 0; size_t token_start = 0; size_t token_len = 0; while (cur < length && token_extractor->nextInStringPadded(data, length, &cur, &token_start, &token_len)) - { gin_filter.add(data + token_start, token_len, rowID, store, limit); - } } -void MergeTreeIndexAggregatorGinFilter::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorInverted::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); auto row_id = store->getNextRowIDRange(rows_read); @@ -189,7 +183,7 @@ void MergeTreeIndexAggregatorGinFilter::update(const Block & block, size_t * pos *pos += rows_read; } -MergeTreeConditionGinFilter::MergeTreeConditionGinFilter( +MergeTreeConditionInverted::MergeTreeConditionInverted( const SelectQueryInfo & query_info, ContextPtr context_, const Block & index_sample_block, @@ -236,7 +230,7 @@ MergeTreeConditionGinFilter::MergeTreeConditionGinFilter( } /// Keep in-sync with MergeTreeConditionFullText::alwaysUnknownOrTrue -bool MergeTreeConditionGinFilter::alwaysUnknownOrTrue() const +bool MergeTreeConditionInverted::alwaysUnknownOrTrue() const { /// Check like in KeyCondition. std::vector rpn_stack; @@ -277,19 +271,18 @@ bool MergeTreeConditionGinFilter::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 || arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } return rpn_stack[0]; } -bool MergeTreeConditionGinFilter::mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule,[[maybe_unused]] PostingsCacheForStore &cache_store) const +bool MergeTreeConditionInverted::mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule,[[maybe_unused]] PostingsCacheForStore & cache_store) const { - std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); if (!granule) - throw Exception( - "GinFilter index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "GinFilter index condition got a granule with the wrong type."); /// Check like in KeyCondition. std::vector rpn_stack; @@ -366,16 +359,16 @@ bool MergeTreeConditionGinFilter::mayBeTrueOnGranuleInPart(MergeTreeIndexGranule rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in GinFilterCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in GinFilterCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in GinFilterCondition::mayBeTrueOnGranule", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in GinFilterCondition::mayBeTrueOnGranule"); return rpn_stack[0].can_be_true; } -bool MergeTreeConditionGinFilter::traverseAtomAST(const RPNBuilderTreeNode & node, RPNElement & out) +bool MergeTreeConditionInverted::traverseAtomAST(const RPNBuilderTreeNode & node, RPNElement & out) { { Field const_value; @@ -455,7 +448,7 @@ bool MergeTreeConditionGinFilter::traverseAtomAST(const RPNBuilderTreeNode & nod return false; } -bool MergeTreeConditionGinFilter::traverseASTEquals( +bool MergeTreeConditionInverted::traverseASTEquals( const String & function_name, const RPNBuilderTreeNode & key_ast, const DataTypePtr & value_type, @@ -608,7 +601,7 @@ bool MergeTreeConditionGinFilter::traverseASTEquals( out.function = RPNElement::FUNCTION_MULTI_SEARCH; /// 2d vector is not needed here but is used because already exists for FUNCTION_IN - std::vector> gin_filters; + std::vector gin_filters; gin_filters.emplace_back(); for (const auto & element : const_value.get()) { @@ -626,7 +619,7 @@ bool MergeTreeConditionGinFilter::traverseASTEquals( return false; } -bool MergeTreeConditionGinFilter::tryPrepareSetGinFilter( +bool MergeTreeConditionInverted::tryPrepareSetGinFilter( const RPNBuilderTreeNode & lhs, const RPNBuilderTreeNode & rhs, RPNElement & out) @@ -669,7 +662,7 @@ bool MergeTreeConditionGinFilter::tryPrepareSetGinFilter( if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString) return false; - std::vector> gin_filters; + std::vector gin_filters; std::vector key_position; Columns columns = prepared_set->getSetElements(); @@ -695,55 +688,55 @@ bool MergeTreeConditionGinFilter::tryPrepareSetGinFilter( return true; } -MergeTreeIndexGranulePtr MergeTreeIndexGinFilter::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexInverted::createIndexGranule() const { - return std::make_shared(index.name, index.column_names.size(), params); + return std::make_shared(index.name, index.column_names.size(), params); } -MergeTreeIndexAggregatorPtr MergeTreeIndexGinFilter::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexInverted::createIndexAggregator() const { /// should not be called: createIndexAggregatorForPart should be used assert(false); return nullptr; } -MergeTreeIndexAggregatorPtr MergeTreeIndexGinFilter::createIndexAggregatorForPart(const GinIndexStorePtr &store) const +MergeTreeIndexAggregatorPtr MergeTreeIndexInverted::createIndexAggregatorForPart(const GinIndexStorePtr & store) const { - return std::make_shared(store, index.column_names, index.name, params, token_extractor.get()); + return std::make_shared(store, index.column_names, index.name, params, token_extractor.get()); } -MergeTreeIndexConditionPtr MergeTreeIndexGinFilter::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexInverted::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { - return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); + return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); }; -bool MergeTreeIndexGinFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const +bool MergeTreeIndexInverted::mayBenefitFromIndexForIn(const ASTPtr & node) const { return std::find(std::cbegin(index.column_names), std::cend(index.column_names), node->getColumnName()) != std::cend(index.column_names); } -MergeTreeIndexPtr ginIndexCreator( +MergeTreeIndexPtr invertedIndexCreator( const IndexDescription & index) { size_t n = index.arguments.empty() ? 0 : index.arguments[0].get(); - Float64 density = index.arguments.size() < 2 ? 1.0f : index.arguments[1].get(); + Float64 density = index.arguments.size() < 2 ? 1.0 : index.arguments[1].get(); GinFilterParameters params(n, density); /// Use SplitTokenExtractor when n is 0, otherwise use NgramTokenExtractor if (n > 0) { auto tokenizer = std::make_unique(n); - return std::make_shared(index, params, std::move(tokenizer)); + return std::make_shared(index, params, std::move(tokenizer)); } else { auto tokenizer = std::make_unique(); - return std::make_shared(index, params, std::move(tokenizer)); + return std::make_shared(index, params, std::move(tokenizer)); } } -void ginIndexValidator(const IndexDescription & index, bool /*attach*/) +void invertedIndexValidator(const IndexDescription & index, bool /*attach*/) { for (const auto & index_data_type : index.data_types) { @@ -761,25 +754,23 @@ void ginIndexValidator(const IndexDescription & index, bool /*attach*/) } if (!data_type.isString() && !data_type.isFixedString()) - throw Exception("Inverted index can be used only with `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Inverted index can be used only with `String`, `FixedString`," + "`LowCardinality(String)`, `LowCardinality(FixedString)` " + "column or Array with `String` or `FixedString` values column."); } - if (index.type != GinFilter::getName()) - throw Exception("Unknown index type: " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); - if (index.arguments.size() > 2) - throw Exception("Inverted index must have less than two arguments.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Inverted index must have less than two arguments."); if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64) - throw Exception("The first Inverted index argument must be positive integer.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "The first Inverted index argument must be positive integer."); if (index.arguments.size() == 2 && (index.arguments[1].getType() != Field::Types::Float64 || index.arguments[1].get() <= 0 || index.arguments[1].get() > 1)) - throw Exception("The second Inverted index argument must be a float between 0 and 1.", ErrorCodes::INCORRECT_QUERY); - - size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get(); - Float64 density = index.arguments.size() < 2 ? 1.0f : index.arguments[1].get(); + throw Exception(ErrorCodes::INCORRECT_QUERY, "The second Inverted index argument must be a float between 0 and 1."); /// Just validate + size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get(); + Float64 density = index.arguments.size() < 2 ? 1.0 : index.arguments[1].get(); GinFilterParameters params(ngrams, density); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGin.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h similarity index 79% rename from src/Storages/MergeTree/MergeTreeIndexGin.h rename to src/Storages/MergeTree/MergeTreeIndexInverted.h index d915d493810..baabed09905 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGin.h +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h @@ -1,24 +1,23 @@ #pragma once -#include -#include -#include - -#include -#include -#include #include +#include +#include +#include +#include +#include +#include namespace DB { -struct MergeTreeIndexGranuleGinFilter final : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleInverted final : public IMergeTreeIndexGranule { - explicit MergeTreeIndexGranuleGinFilter( + explicit MergeTreeIndexGranuleInverted( const String & index_name_, size_t columns_number, const GinFilterParameters & params_); - ~MergeTreeIndexGranuleGinFilter() override = default; + ~MergeTreeIndexGranuleInverted() override = default; void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -27,30 +26,29 @@ struct MergeTreeIndexGranuleGinFilter final : public IMergeTreeIndexGranule String index_name; GinFilterParameters params; - - std::vector gin_filters; + GinFilters gin_filters; bool has_elems; }; -using MergeTreeIndexGranuleGinFilterPtr = std::shared_ptr; +using MergeTreeIndexGranuleInvertedPtr = std::shared_ptr; -struct MergeTreeIndexAggregatorGinFilter final : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorInverted final : IMergeTreeIndexAggregator { - explicit MergeTreeIndexAggregatorGinFilter( + explicit MergeTreeIndexAggregatorInverted( GinIndexStorePtr store_, const Names & index_columns_, const String & index_name_, const GinFilterParameters & params_, TokenExtractorPtr token_extractor_); - ~MergeTreeIndexAggregatorGinFilter() override = default; + ~MergeTreeIndexAggregatorInverted() override = default; bool empty() const override { return !granule || granule->empty(); } MergeTreeIndexGranulePtr getGranuleAndReset() override; void update(const Block & block, size_t * pos, size_t limit) override; - void addToGinFilter(UInt32 rowID, const char* data, size_t length, GinFilter& gin_filter, UInt64 limit); + void addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter, UInt64 limit); GinIndexStorePtr store; Names index_columns; @@ -58,21 +56,21 @@ struct MergeTreeIndexAggregatorGinFilter final : IMergeTreeIndexAggregator const GinFilterParameters params; TokenExtractorPtr token_extractor; - MergeTreeIndexGranuleGinFilterPtr granule; + MergeTreeIndexGranuleInvertedPtr granule; }; -class MergeTreeConditionGinFilter final : public IMergeTreeIndexCondition, WithContext +class MergeTreeConditionInverted final : public IMergeTreeIndexCondition, WithContext { public: - MergeTreeConditionGinFilter( + MergeTreeConditionInverted( const SelectQueryInfo & query_info, ContextPtr context, const Block & index_sample_block, const GinFilterParameters & params_, TokenExtractorPtr token_extactor_); - ~MergeTreeConditionGinFilter() override = default; + ~MergeTreeConditionInverted() override = default; bool alwaysUnknownOrTrue() const override; bool mayBeTrueOnGranule([[maybe_unused]]MergeTreeIndexGranulePtr idx_granule) const override @@ -81,7 +79,8 @@ public: assert(false); return false; } - bool mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule, [[maybe_unused]] PostingsCacheForStore& cache_store) const; + bool mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule, [[maybe_unused]] PostingsCacheForStore & cache_store) const; + private: struct KeyTuplePositionMapping { @@ -124,7 +123,7 @@ private: std::unique_ptr gin_filter; /// For FUNCTION_IN, FUNCTION_NOT_IN and FUNCTION_MULTI_SEARCH - std::vector> set_gin_filters; + std::vector set_gin_filters; /// For FUNCTION_IN and FUNCTION_NOT_IN std::vector set_key_position; @@ -154,10 +153,10 @@ private: PreparedSetsPtr prepared_sets; }; -class MergeTreeIndexGinFilter final : public IMergeTreeIndex +class MergeTreeIndexInverted final : public IMergeTreeIndex { public: - MergeTreeIndexGinFilter( + MergeTreeIndexInverted( const IndexDescription & index_, const GinFilterParameters & params_, std::unique_ptr && token_extractor_) @@ -165,13 +164,12 @@ public: , params(params_) , token_extractor(std::move(token_extractor_)) {} - ~MergeTreeIndexGinFilter() override = default; + ~MergeTreeIndexInverted() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr &store) const override; - MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, ContextPtr context) const override; + MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr & store) const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index e145ae68e65..d80f7521430 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -34,8 +34,7 @@ MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax( void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const { if (empty()) - throw Exception( - "Attempt to write empty minmax index " + backQuote(index_name), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name)); for (size_t i = 0; i < index_sample_block.columns(); ++i) { @@ -122,9 +121,8 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset() void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); @@ -191,8 +189,7 @@ bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr std::shared_ptr granule = std::dynamic_pointer_cast(idx_granule); if (!granule) - throw Exception( - "Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Minmax index condition got a granule with the wrong type."); return condition.checkInHyperrectangle(granule->hyperrectangle, index_data_types).can_be_true; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index db99a2f37be..d28272b6d73 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -145,9 +145,8 @@ MergeTreeIndexAggregatorSet::MergeTreeIndexAggregatorSet(const String & index_na void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); @@ -713,9 +712,9 @@ MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) void setIndexValidator(const IndexDescription & index, bool /*attach*/) { if (index.arguments.size() != 1) - throw Exception("Set index must have exactly one argument.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Set index must have exactly one argument."); else if (index.arguments[0].getType() != Field::Types::UInt64) - throw Exception("Set index argument must be positive integer.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Set index argument must be positive integer."); } } diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index e5e376e7f69..2be9ecd8de3 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -20,13 +20,13 @@ namespace ErrorCodes void MergeTreeIndexFactory::registerCreator(const std::string & index_type, Creator creator) { if (!creators.emplace(index_type, std::move(creator)).second) - throw Exception("MergeTreeIndexFactory: the Index creator name '" + index_type + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeIndexFactory: the Index creator name '{}' is not unique", + index_type); } void MergeTreeIndexFactory::registerValidator(const std::string & index_type, Validator validator) { if (!validators.emplace(index_type, std::move(validator)).second) - throw Exception("MergeTreeIndexFactory: the Index validator name '" + index_type + "' is not unique", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeIndexFactory: the Index validator name '{}' is not unique", index_type); } @@ -35,8 +35,8 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get( { auto it = creators.find(index.type); if (it == creators.end()) - throw Exception( - "Unknown Index type '" + index.type + "'. Available index types: " + + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Unknown Index type '{}'. Available index types: {}", index.type, std::accumulate(creators.cbegin(), creators.cend(), std::string{}, [] (auto && left, const auto & right) -> std::string { @@ -44,8 +44,8 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get( return right.first; else return left + ", " + right.first; - }), - ErrorCodes::INCORRECT_QUERY); + }) + ); return it->second(index); } @@ -63,9 +63,9 @@ void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach { auto it = validators.find(index.type); if (it == validators.end()) - throw Exception( - "Unknown Index type '" + index.type + "'. Available index types: " - + std::accumulate( + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Unknown Index type '{}'. Available index types: {}", index.type, + std::accumulate( validators.cbegin(), validators.cend(), std::string{}, @@ -75,8 +75,8 @@ void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach return right.first; else return left + ", " + right.first; - }), - ErrorCodes::INCORRECT_QUERY); + }) + ); it->second(index, attach); } @@ -105,8 +105,9 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerCreator("annoy", annoyIndexCreator); registerValidator("annoy", annoyIndexValidator); #endif - registerCreator("inverted", ginIndexCreator); - registerValidator("inverted", ginIndexValidator); + + registerCreator("inverted", invertedIndexCreator); + registerValidator("inverted", invertedIndexValidator); } diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 52cf8c850b3..1ad6b082223 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,7 +17,6 @@ #include #include -#include constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; @@ -237,7 +237,8 @@ void hypothesisIndexValidator(const IndexDescription & index, bool attach); MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index); void annoyIndexValidator(const IndexDescription & index, bool attach); #endif -MergeTreeIndexPtr ginIndexCreator(const IndexDescription& index); -void ginIndexValidator(const IndexDescription& index, bool attach); + +MergeTreeIndexPtr invertedIndexCreator(const IndexDescription& index); +void invertedIndexValidator(const IndexDescription& index, bool attach); } diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 397a9d82655..3fc7ff54c35 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -106,7 +107,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() ErrorCodes::CORRUPTED_DATA, "Bad size of marks file '{}': {}, must be: {}", std::string(fs::path(data_part_storage->getFullPath()) / mrk_path), - std::to_string(file_size), std::to_string(expected_uncompressed_size)); + file_size, expected_uncompressed_size); auto buffer = data_part_storage->readFile(mrk_path, read_settings.adjustBufferSize(file_size), file_size, std::nullopt); std::unique_ptr reader; @@ -178,29 +179,11 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarks() std::future MergeTreeMarksLoader::loadMarksAsync() { - ThreadGroupStatusPtr thread_group; - if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - thread_group = CurrentThread::get().getThreadGroup(); - - auto task = std::make_shared>([thread_group, this] - { - setThreadName("loadMarksThread"); - - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE({ - if (thread_group) - CurrentThread::detachQuery(); - }); - - ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks); - return loadMarks(); - }); - - auto task_future = task->get_future(); - load_marks_threadpool->scheduleOrThrow([task]{ (*task)(); }); - return task_future; + return scheduleFromThreadPool([this]() -> MarkCache::MappedPtr + { + ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks); + return loadMarks(); + }, *load_marks_threadpool, "LoadMarksThread"); } } diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 9906ea3d02a..2e30a3f3986 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -42,7 +42,9 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) { if (UInt64 maybe_block_number = tryParseFileName(file_name_)) return maybe_block_number; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse mutation version from file name, expected 'mutation_.txt', got '{}'", file_name_); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot parse mutation version from file name, expected 'mutation_.txt', got '{}'", + file_name_); } MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index 3b1c41f61ba..a6baecee125 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -143,7 +143,7 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, D || !checkChar('_', in) || !tryReadIntText(max_yyyymmdd, in)) { - throw Exception("Unexpected part name: " + part_name, ErrorCodes::BAD_DATA_PART_NAME); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {}", part_name); } const auto & date_lut = DateLUT::instance(); @@ -155,7 +155,7 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, D auto max_month = date_lut.toNumYYYYMM(max_date); if (min_month != max_month) - throw Exception("Part name " + part_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Part name {} contains different months", part_name); } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 5d4b4853812..903f467d159 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -200,7 +200,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const String MergeTreePartition::getID(const Block & partition_key_sample) const { if (value.size() != partition_key_sample.columns()) - throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid partition key size: {}", value.size()); if (value.empty()) return "all"; /// It is tempting to use an empty string here. But that would break directory structure in ZK. diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index b618b068769..6f83a82e4e6 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -205,7 +205,7 @@ bool MergeTreePartsMover::selectPartsForMove( MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const { if (moves_blocker.isCancelled()) - throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); auto settings = data->getSettings(); auto part = moving_part.part; @@ -240,7 +240,8 @@ MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEn cloned_part_storage = part->makeCloneOnDisk(disk, MergeTreeData::MOVING_DIR_NAME); } - auto cloned_part = data->createPart(part->name, cloned_part_storage); + MergeTreeDataPartBuilder builder(*data, part->name, cloned_part_storage); + auto cloned_part = std::move(builder).withPartFormatFromDisk().build(); LOG_TRACE(log, "Part {} was cloned to {}", part->name, cloned_part->getDataPartStorage().getFullPath()); cloned_part->loadColumnsChecksumsIndexes(true, true); @@ -253,7 +254,7 @@ MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEn void MergeTreePartsMover::swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_part) const { if (moves_blocker.isCancelled()) - throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); auto active_part = data->getActiveContainingPart(cloned_part->name); diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 6f8da624449..f5afc0b37d6 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -189,24 +189,24 @@ MergeTreeRangeReader::Stream::Stream( { size_t marks_count = index_granularity->getMarksCount(); if (from_mark >= marks_count) - throw Exception("Trying create stream to read from mark №"+ toString(current_mark) + " but total marks count is " - + toString(marks_count), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying create stream to read from mark №{} but total marks count is {}", + toString(current_mark), toString(marks_count)); if (last_mark > marks_count) - throw Exception("Trying create stream to read to mark №"+ toString(current_mark) + " but total marks count is " - + toString(marks_count), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying create stream to read to mark №{} but total marks count is {}", + toString(current_mark), toString(marks_count)); } void MergeTreeRangeReader::Stream::checkNotFinished() const { if (isFinished()) - throw Exception("Cannot read out of marks range.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read out of marks range."); } void MergeTreeRangeReader::Stream::checkEnoughSpaceInCurrentGranule(size_t num_rows) const { if (num_rows + offset_after_current_mark > current_mark_index_granularity) - throw Exception("Cannot read from granule more than index_granularity.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read from granule more than index_granularity."); } size_t MergeTreeRangeReader::Stream::readRows(Columns & columns, size_t num_rows) @@ -229,7 +229,8 @@ void MergeTreeRangeReader::Stream::toNextMark() else if (current_mark == total_marks_count) current_mark_index_granularity = 0; /// HACK? else - throw Exception("Trying to read from mark " + toString(current_mark) + ", but total marks count " + toString(total_marks_count), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to read from mark {}, but total marks count {}", + toString(current_mark), toString(total_marks_count)); offset_after_current_mark = 0; } @@ -305,12 +306,12 @@ void MergeTreeRangeReader::ReadResult::adjustLastGranule() size_t num_rows_to_subtract = total_rows_per_granule - num_read_rows; if (rows_per_granule.empty()) - throw Exception("Can't adjust last granule because no granules were added", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't adjust last granule because no granules were added"); if (num_rows_to_subtract > rows_per_granule.back()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't adjust last granule because it has {} rows, but try to subtract {} rows.", - toString(rows_per_granule.back()), toString(num_rows_to_subtract)); + rows_per_granule.back(), num_rows_to_subtract); rows_per_granule.back() -= num_rows_to_subtract; total_rows_per_granule -= num_rows_to_subtract; @@ -922,7 +923,7 @@ bool MergeTreeRangeReader::isCurrentRangeFinished() const MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, MarkRanges & ranges) { if (max_rows == 0) - throw Exception("Expected at least 1 row to read, got 0.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected at least 1 row to read, got 0."); ReadResult read_result(log); @@ -1197,8 +1198,8 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si /// added_rows may be zero if all columns were read in prewhere and it's ok. if (num_rows && num_rows != result.total_rows_per_granule) - throw Exception("RangeReader read " + toString(num_rows) + " rows, but " - + toString(result.total_rows_per_granule) + " expected.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "RangeReader read {} rows, but {} expected.", + num_rows, result.total_rows_per_granule); return columns; } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 0f85fd2ad9c..37b24422af0 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -201,6 +201,10 @@ std::vector MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & for (const auto i : collections::range(0, parts.size())) { const auto & part = parts[i]; +#ifndef NDEBUG + assertSortedAndNonIntersecting(part.ranges); +#endif + bool part_on_remote_disk = part.data_part->isStoredOnRemoteDisk(); is_part_on_remote_disk[i] = part_on_remote_disk; do_not_steal_tasks |= part_on_remote_disk; @@ -315,7 +319,7 @@ void MergeTreeReadPool::fillPerThreadInfo( while (need_marks > 0) { if (part.ranges.empty()) - throw Exception("Unexpected end of ranges while spreading marks among threads", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected end of ranges while spreading marks among threads"); MarkRange & range = part.ranges.front(); diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index 3b3a6b95cff..c54f54e62e0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -57,16 +57,16 @@ size_t MergeTreeReaderInMemory::readRows( size_t total_marks = data_part_info_for_read->getIndexGranularity().getMarksCount(); if (from_mark >= total_marks) - throw Exception("Mark " + toString(from_mark) + " is out of bound. Max mark: " - + toString(total_marks), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Mark {} is out of bound. Max mark: {}", + toString(from_mark), toString(total_marks)); size_t num_columns = res_columns.size(); checkNumberOfColumns(num_columns); size_t part_rows = part_in_memory->block.rows(); if (total_rows_read >= part_rows) - throw Exception("Cannot read data in MergeTreeReaderInMemory. Rows already read: " - + toString(total_rows_read) + ". Rows in part: " + toString(part_rows), ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data in MergeTreeReaderInMemory. " + "Rows already read: {}. Rows in part: {}", total_rows_read, part_rows); size_t rows_to_read = std::min(max_rows_to_read, part_rows - total_rows_read); for (size_t i = 0; i < num_columns; ++i) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index ae1bace79e3..6eafd8824b8 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -25,7 +26,7 @@ struct Settings; M(UInt64, min_compress_block_size, 0, "When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.", 0) \ M(UInt64, max_compress_block_size, 0, "Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.", 0) \ M(UInt64, index_granularity, 8192, "How many rows correspond to one primary key value.", 0) \ - M(UInt64, max_digestion_size_per_segment, 1024 * 1024 * 256, "Max number of bytes to digest per segment to build GIN index.", 0) \ + M(UInt64, max_digestion_size_per_segment, 256_MiB, "Max number of bytes to digest per segment to build GIN index.", 0) \ \ /** Data storing format settings. */ \ M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 5b916096e06..fabf2acdad3 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -12,7 +12,7 @@ #include #include #include -#include "Storages/MergeTree/DataPartStorageOnDisk.h" +#include "Storages/MergeTree/DataPartStorageOnDiskFull.h" #include namespace DB @@ -176,16 +176,13 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( else if (action_type == ActionType::ADD_PART) { auto single_disk_volume = std::make_shared("volume_" + part_name, disk, 0); - auto data_part_storage = std::make_shared(single_disk_volume, storage.getRelativeDataPath(), part_name); - part = storage.createPart( - part_name, - MergeTreeDataPartType::InMemory, - MergeTreePartInfo::fromPartName(part_name, storage.format_version), - data_part_storage); + part = storage.getDataPartBuilder(part_name, single_disk_volume, part_name) + .withPartType(MergeTreeDataPartType::InMemory) + .withPartStorageType(MergeTreeDataPartStorageType::Full) + .build(); part->uuid = metadata.part_uuid; - block = block_in.read(); if (storage.getActiveContainingPart(part->info, MergeTreeDataPartState::Active, parts_lock)) @@ -193,7 +190,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( } else { - throw Exception("Unknown action type: " + toString(static_cast(action_type)), ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Unknown action type: {}", toString(static_cast(action_type))); } } catch (const Exception & e) @@ -356,8 +353,9 @@ void MergeTreeWriteAheadLog::ActionMetadata::read(ReadBuffer & meta_in) { readIntBinary(min_compatible_version, meta_in); if (min_compatible_version > WAL_VERSION) - throw Exception("WAL metadata version " + toString(min_compatible_version) - + " is not compatible with this ClickHouse version", ErrorCodes::UNKNOWN_FORMAT_VERSION); + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, + "WAL metadata version {} is not compatible with this ClickHouse version", + toString(min_compatible_version)); size_t metadata_size; readVarUInt(metadata_size, meta_in); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 3a7484a4141..ced43ae25b0 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -242,8 +242,8 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis written_files.emplace_back(std::move(file)); } else if (rows_count) - throw Exception("MinMax index was not initialized for new non-empty part " + new_part->name - + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MinMax index was not initialized for new non-empty part {}. It is a bug.", + new_part->name); } { @@ -298,8 +298,8 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis } else { - throw Exception("Compression codec have to be specified for part on disk, empty for" + new_part->name - + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Compression codec have to be specified for part on disk, empty for{}. " + "It is a bug.", new_part->name); } { diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index e4a5a0bc3ba..03829f1daf9 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -42,7 +42,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( auto * writer_on_disk = dynamic_cast(writer.get()); if (!writer_on_disk) - throw Exception("MergedColumnOnlyOutputStream supports only parts stored on disk", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergedColumnOnlyOutputStream supports only parts stored on disk"); writer_on_disk->setWrittenOffsetColumns(offset_columns_); } diff --git a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp index b432841d5b0..d9d7c496d9f 100644 --- a/src/Storages/MergeTree/MutateFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MutateFromLogEntryTask.cpp @@ -22,7 +22,9 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() MergeTreeData::DataPartPtr source_part = storage.getActiveContainingPart(source_part_name); if (!source_part) { - LOG_DEBUG(log, "Source part {} for {} is not ready; will try to fetch it instead", source_part_name, entry.new_part_name); + LOG_DEBUG(log, "Source part {} for {} is missing; will try to fetch it instead. " + "Either pool for fetches is starving, see background_fetches_pool_size, or none of active replicas has it", + source_part_name, entry.new_part_name); return PrepareResult{ .prepared_successfully = false, .need_to_check_missing_part_in_fetch = true, @@ -106,7 +108,7 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() future_mutated_part->parts.push_back(source_part); future_mutated_part->part_info = new_part_info; future_mutated_part->updatePath(storage, reserved_space.get()); - future_mutated_part->type = source_part->getType(); + future_mutated_part->part_format = source_part->getFormat(); if (storage_settings_ptr->allow_remote_fs_zero_copy_replication) { @@ -192,6 +194,10 @@ ReplicatedMergeMutateTaskBase::PrepareResult MutateFromLogEntryTask::prepare() bool MutateFromLogEntryTask::finalize(ReplicatedMergeMutateTaskBase::PartLogWriter write_part_log) { new_part = mutate_task->getFuture().get(); + auto & data_part_storage = new_part->getDataPartStorage(); + if (data_part_storage.hasActiveTransaction()) + data_part_storage.precommitTransaction(); + storage.renameTempPartAndReplace(new_part, *transaction_ptr); try diff --git a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp index 6ebf3e1fc22..5211e0f9c33 100644 --- a/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp +++ b/src/Storages/MergeTree/MutatePlainMergeTreeTask.cpp @@ -60,7 +60,6 @@ void MutatePlainMergeTreeTask::prepare() bool MutatePlainMergeTreeTask::executeStep() { - /// Make out memory tracker a parent of current thread memory tracker MemoryTrackerThreadSwitcherPtr switcher; if (merge_list_entry) @@ -82,6 +81,9 @@ bool MutatePlainMergeTreeTask::executeStep() return true; new_part = mutate_task->getFuture().get(); + auto & data_part_storage = new_part->getDataPartStorage(); + if (data_part_storage.hasActiveTransaction()) + data_part_storage.precommitTransaction(); MergeTreeData::Transaction transaction(storage, merge_mutate_entry->txn.get()); /// FIXME Transactions: it's too optimistic, better to lock parts before starting transaction @@ -100,13 +102,13 @@ bool MutatePlainMergeTreeTask::executeStep() merge_mutate_entry->txn->onException(); PreformattedMessage exception_message = getCurrentExceptionMessageAndPattern(/* with_stacktrace */ false); LOG_ERROR(&Poco::Logger::get("MutatePlainMergeTreeTask"), exception_message); - storage.updateMutationEntriesErrors(future_part, false, exception_message.message); + storage.updateMutationEntriesErrors(future_part, false, exception_message.text); write_part_log(ExecutionStatus::fromCurrentException()); tryLogCurrentException(__PRETTY_FUNCTION__); return false; } } - case State::NEED_FINISH : + case State::NEED_FINISH: { // Nothing to do state = State::SUCCESS; diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 6e222a562a0..47df0cfe42e 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include @@ -60,7 +60,7 @@ static void splitMutationCommands( { auto part_columns = part->getColumnsDescription(); - if (!isWidePart(part)) + if (!isWidePart(part) || !isFullPartStorage(part->getDataPartStorage())) { NameSet mutated_columns; for (const auto & command : commands) @@ -217,7 +217,7 @@ getColumnsForNewDataPart( /// In compact parts we read all columns, because they all stored in a /// single file - if (!isWidePart(source_part)) + if (!isWidePart(source_part) || !isFullPartStorage(source_part->getDataPartStorage())) return {updated_header.getNamesAndTypesList(), new_serialization_infos}; const auto & source_columns = source_part->getColumns(); @@ -268,8 +268,10 @@ getColumnsForNewDataPart( /// should it's previous version should be dropped or removed if (renamed_columns_to_from.contains(it->name) && !was_renamed && !was_removed) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Incorrect mutation commands, trying to rename column {} to {}, but part {} already has column {}", renamed_columns_to_from[it->name], it->name, source_part->name, it->name); + ErrorCodes::LOGICAL_ERROR, + "Incorrect mutation commands, trying to rename column {} to {}, " + "but part {} already has column {}", + renamed_columns_to_from[it->name], it->name, source_part->name, it->name); /// Column was renamed and no other column renamed to it's name /// or column is dropped. @@ -628,6 +630,8 @@ void finalizeMutatedPart( ContextPtr context, bool sync) { + std::vector> written_files; + if (new_data_part->uuid != UUIDHelpers::Nil) { auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::UUID_FILE_NAME, 4096, context->getWriteSettings()); @@ -635,8 +639,7 @@ void finalizeMutatedPart( writeUUIDText(new_data_part->uuid, out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash(); - if (sync) - out_hashing.sync(); + written_files.push_back(std::move(out)); } if (execute_ttl_type != ExecuteTTLType::NONE) @@ -647,50 +650,60 @@ void finalizeMutatedPart( new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); new_data_part->checksums.files["ttl.txt"].file_hash = out_hashing.getHash(); - if (sync) - out_hashing.sync(); + written_files.push_back(std::move(out_ttl)); } if (!new_data_part->getSerializationInfos().empty()) { - auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, context->getWriteSettings()); - HashingWriteBuffer out_hashing(*out); + auto out_serialization = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, context->getWriteSettings()); + HashingWriteBuffer out_hashing(*out_serialization); new_data_part->getSerializationInfos().writeJSON(out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_hash = out_hashing.getHash(); - if (sync) - out_hashing.sync(); + written_files.push_back(std::move(out_serialization)); } { /// Write file with checksums. auto out_checksums = new_data_part->getDataPartStorage().writeFile("checksums.txt", 4096, context->getWriteSettings()); new_data_part->checksums.write(*out_checksums); - if (sync) - out_checksums->sync(); - } /// close fd + written_files.push_back(std::move(out_checksums)); + } { - auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); - DB::writeText(queryToString(codec->getFullCodecDesc()), *out); - if (sync) - out->sync(); - } /// close fd + auto out_comp = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); + DB::writeText(queryToString(codec->getFullCodecDesc()), *out_comp); + written_files.push_back(std::move(out_comp)); + } { /// Write a file with a description of columns. auto out_columns = new_data_part->getDataPartStorage().writeFile("columns.txt", 4096, context->getWriteSettings()); new_data_part->getColumns().writeText(*out_columns); + written_files.push_back(std::move(out_columns)); + } + + for (auto & file : written_files) + { + file->finalize(); if (sync) - out_columns->sync(); - } /// close fd + file->sync(); + } + /// Close files + written_files.clear(); new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; new_data_part->index = source_part->index; new_data_part->minmax_idx = source_part->minmax_idx; new_data_part->modification_time = time(nullptr); + + /// This line should not be here because at that moment + /// of executing of mutation all projections should be loaded. + /// But unfortunately without it some tests fail. + /// TODO: fix. new_data_part->loadProjections(false, false); + /// All information about sizes is stored in checksums. /// It doesn't make sense to touch filesystem for sizes. new_data_part->setBytesOnDisk(new_data_part->checksums.getTotalSizeOnDisk()); @@ -1173,7 +1186,7 @@ private: ctx->projections_to_build = MutationHelpers::getProjectionsForNewDataPart(ctx->metadata_snapshot->getProjections(), ctx->for_file_renames); if (!ctx->mutating_pipeline_builder.initialized()) - throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot mutate part columns with uninitialized mutations stream. It's a bug"); QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder)); @@ -1256,7 +1269,6 @@ public: case State::NEED_PREPARE: { prepare(); - state = State::NEED_EXECUTE; return true; } @@ -1485,7 +1497,6 @@ MutateTask::MutateTask( bool MutateTask::execute() { - switch (state) { case State::NEED_PREPARE: @@ -1526,7 +1537,7 @@ bool MutateTask::prepare() if (ctx->future_part->parts.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to mutate {} parts, not one. " - "This is a bug.", toString(ctx->future_part->parts.size())); + "This is a bug.", ctx->future_part->parts.size()); ctx->num_mutations = std::make_unique(CurrentMetrics::PartMutation); @@ -1575,7 +1586,6 @@ bool MutateTask::prepare() prefix = "tmp_clone_"; auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); - part->getDataPartStorage().beginTransaction(); ctx->temporary_directory_lock = std::move(lock); @@ -1616,15 +1626,12 @@ bool MutateTask::prepare() String tmp_part_dir_name = prefix + ctx->future_part->name; ctx->temporary_directory_lock = ctx->data->getTemporaryPartDirectoryHolder(tmp_part_dir_name); - auto data_part_storage = std::make_shared( - single_disk_volume, - ctx->data->getRelativeDataPath(), - tmp_part_dir_name); + auto builder = ctx->data->getDataPartBuilder(ctx->future_part->name, single_disk_volume, tmp_part_dir_name); + builder.withPartFormat(ctx->future_part->part_format); + builder.withPartInfo(ctx->future_part->part_info); - data_part_storage->beginTransaction(); - - ctx->new_data_part = ctx->data->createPart( - ctx->future_part->name, ctx->future_part->type, ctx->future_part->part_info, data_part_storage); + ctx->new_data_part = std::move(builder).build(); + ctx->new_data_part->getDataPartStorage().beginTransaction(); ctx->new_data_part->uuid = ctx->future_part->uuid; ctx->new_data_part->is_temp = true; @@ -1652,7 +1659,7 @@ bool MutateTask::prepare() /// All columns from part are changed and may be some more that were missing before in part /// TODO We can materialize compact part without copying data - if (!isWidePart(ctx->source_part) + if (!isWidePart(ctx->source_part) || !isFullPartStorage(ctx->source_part->getDataPartStorage()) || (ctx->mutation_kind == MutationsInterpreter::MutationKind::MUTATE_OTHER && ctx->interpreter && ctx->interpreter->isAffectingAllColumns())) { task = std::make_unique(ctx); @@ -1708,8 +1715,8 @@ bool MutateTask::prepare() files_to_copy_instead_of_hardlinks.insert(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK); auto [part, lock] = ctx->data->cloneAndLoadDataPartOnSameDisk(ctx->source_part, prefix, ctx->future_part->part_info, ctx->metadata_snapshot, ctx->txn, &ctx->hardlinked_files, false, files_to_copy_instead_of_hardlinks); - part->getDataPartStorage().beginTransaction(); + ctx->temporary_directory_lock = std::move(lock); promise.set_value(std::move(part)); return false; diff --git a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp index 90fd25bc4e7..b6260d5edb6 100644 --- a/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp +++ b/src/Storages/MergeTree/PartMetadataManagerWithCache.cpp @@ -177,7 +177,7 @@ void PartMetadataManagerWithCache::assertAllDeleted(bool include_projection) con { throw Exception( ErrorCodes::LOGICAL_ERROR, - "Data part {} with type {} with meta file {} with projection name still in cache", + "Data part {} with type {} with meta file {} with projection name {} still in cache", part->name, part->getType().toString(), file_path, diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 560d9f17a07..65a82988a5d 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -702,7 +702,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::ge return entry; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Task with id {} not found", toString(task_uuid)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Task with id {} not found", task_uuid); } String PartMovesBetweenShardsOrchestrator::Entry::toString() const diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h index 2474b5807e2..24454b897af 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h @@ -84,7 +84,7 @@ public: case CANCELLED: return "CANCELLED"; } - throw Exception("Unknown EntryState: " + DB::toString(value), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown EntryState: {}", DB::toString(value)); } static EntryState::Value fromString(String in) @@ -100,7 +100,7 @@ public: else if (in == "REMOVE_UUID_PIN") return REMOVE_UUID_PIN; else if (in == "DONE") return DONE; else if (in == "CANCELLED") return CANCELLED; - else throw Exception("Unknown state: " + in, ErrorCodes::LOGICAL_ERROR); + else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state: {}", in); } }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 1c667b1c867..0409cadc1e9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -90,7 +90,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() Coordination::Stat stat; if (!zookeeper->exists(storage.zookeeper_path + "/log", &stat)) - throw Exception(storage.zookeeper_path + "/log doesn't exist", ErrorCodes::NOT_FOUND_NODE); + throw Exception(ErrorCodes::NOT_FOUND_NODE, "{}/log doesn't exist", storage.zookeeper_path); int children_count = stat.numChildren; @@ -293,7 +293,7 @@ void ReplicatedMergeTreeCleanupThread::markLostReplicas(const std::unordered_map } if (candidate_lost_replicas.size() == replicas_count) - throw Exception("All replicas are stale: we won't mark any replica as lost", ErrorCodes::ALL_REPLICAS_LOST); + throw Exception(ErrorCodes::ALL_REPLICAS_LOST, "All replicas are stale: we won't mark any replica as lost"); std::vector futures; for (size_t i = 0; i < candidate_lost_replicas.size(); ++i) @@ -303,7 +303,7 @@ void ReplicatedMergeTreeCleanupThread::markLostReplicas(const std::unordered_map { auto multi_responses = futures[i].get(); if (multi_responses.responses[0]->error == Coordination::Error::ZBADVERSION) - throw Exception(candidate_lost_replicas[i] + " became active when we marked lost replicas.", DB::ErrorCodes::REPLICA_STATUS_CHANGED); + throw Exception(DB::ErrorCodes::REPLICA_STATUS_CHANGED, "{} became active when we marked lost replicas.", candidate_lost_replicas[i]); zkutil::KeeperMultiException::check(multi_responses.error, requests[i], multi_responses.responses); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp index d7e3c3b1955..556751d4dbb 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp @@ -165,8 +165,16 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const out << '\n'; - if (new_part_type != MergeTreeDataPartType::Wide && new_part_type != MergeTreeDataPartType::Unknown) - out << "part_type: " << new_part_type.toString() << "\n"; + using PartType = MergeTreeDataPartType; + using StorageType = MergeTreeDataPartStorageType; + + auto part_type = new_part_format.part_type; + if (part_type != PartType::Wide && part_type != PartType::Unknown) + out << "part_type: " << part_type.toString() << "\n"; + + auto storage_type = new_part_format.storage_type; + if (storage_type != StorageType::Full && storage_type != StorageType::Unknown) + out << "storage_type: " << storage_type.toString() << "\n"; if (quorum) out << "quorum: " << quorum << '\n'; @@ -342,13 +350,21 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in) if (checkString("part_type: ", in)) { - String part_type_str; in >> type_str; - new_part_type.fromString(type_str); + new_part_format.part_type.fromString(type_str); in >> "\n"; } else - new_part_type = MergeTreeDataPartType::Wide; + new_part_format.part_type = MergeTreeDataPartType::Wide; + + if (checkString("storage_type: ", in)) + { + in >> type_str; + new_part_format.storage_type.fromString(type_str); + in >> "\n"; + } + else + new_part_format.storage_type = MergeTreeDataPartStorageType::Full; /// Optional field. if (!in.eof()) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 84115cb2b8a..7f504baaab3 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -63,7 +63,7 @@ struct ReplicatedMergeTreeLogEntryData case ReplicatedMergeTreeLogEntryData::SYNC_PINNED_PART_UUIDS: return "SYNC_PINNED_PART_UUIDS"; case ReplicatedMergeTreeLogEntryData::CLONE_PART_FROM_SHARD: return "CLONE_PART_FROM_SHARD"; default: - throw Exception("Unknown log entry type: " + DB::toString(type), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown log entry type: {}", DB::toString(type)); } } @@ -88,7 +88,7 @@ struct ReplicatedMergeTreeLogEntryData /// The name of resulting part for GET_PART and MERGE_PARTS /// Part range for DROP_RANGE and CLEAR_COLUMN String new_part_name; - MergeTreeDataPartType new_part_type; + MergeTreeDataPartFormat new_part_format; String block_id; /// For parts of level zero, the block identifier for deduplication (node name in /blocks/). mutable String actual_new_part_name; /// GET_PART could actually fetch a part covering 'new_part_name'. UUID new_part_uuid = UUIDHelpers::Nil; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 6f1d8dd93e7..88c9aa2d943 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -67,11 +67,14 @@ void ReplicatedMergeTreePartCheckThread::enqueuePart(const String & name, time_t task->schedule(); } -void ReplicatedMergeTreePartCheckThread::cancelRemovedPartsCheck(const MergeTreePartInfo & drop_range_info) +std::unique_lock ReplicatedMergeTreePartCheckThread::pausePartsCheck() { /// Wait for running tasks to finish and temporarily stop checking - auto pause_checking_parts = task->getExecLock(); + return task->getExecLock(); +} +void ReplicatedMergeTreePartCheckThread::cancelRemovedPartsCheck(const MergeTreePartInfo & drop_range_info) +{ std::lock_guard lock(parts_mutex); for (auto it = parts_queue.begin(); it != parts_queue.end();) { @@ -126,7 +129,7 @@ ReplicatedMergeTreePartCheckThread::MissingPartSearchResult ReplicatedMergeTreeP * and don't delete the queue entry when in doubt. */ - LOG_WARNING(log, "Checking if anyone has a part {} or covering part.", part_name); + LOG_INFO(log, "Checking if anyone has a part {} or covering part.", part_name); bool found_part_with_the_same_min_block = false; bool found_part_with_the_same_max_block = false; @@ -300,7 +303,7 @@ std::pair ReplicatedMergeTreePartCheckThread::findLo CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_name) { - LOG_WARNING(log, "Checking part {}", part_name); + LOG_INFO(log, "Checking part {}", part_name); ProfileEvents::increment(ProfileEvents::ReplicatedPartChecks); auto [exists_in_zookeeper, part] = findLocalPart(part_name); @@ -347,7 +350,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na } if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) - throw Exception("Columns of local part " + part_name + " are different from ZooKeeper", ErrorCodes::TABLE_DIFFERS_TOO_MUCH); + throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h index 145c0e70ad7..170b2ca1f60 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.h @@ -71,6 +71,9 @@ public: /// Check part by name CheckResult checkPart(const String & part_name); + std::unique_lock pausePartsCheck(); + + /// Can be called only while holding a lock returned from pausePartsCheck() void cancelRemovedPartsCheck(const MergeTreePartInfo & drop_range_info); private: diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index bd75d76109a..e47dddb9795 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -432,7 +432,7 @@ void ReplicatedMergeTreeQueue::removeCoveredPartsFromMutations(const String & pa else if (remove_part) status.parts_to_do.remove(part_name); else - throw Exception("Called remove part from mutations, but nothing removed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Called remove part from mutations, but nothing removed"); if (status.parts_to_do.size() == 0) some_mutations_are_probably_done = true; @@ -581,7 +581,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper } if (pull_log_blocker.isCancelled()) - throw Exception("Log pulling is cancelled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Log pulling is cancelled"); String index_str = zookeeper->get(fs::path(replica_path) / "log_pointer"); UInt64 index; @@ -637,8 +637,8 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper String last_entry = *last; if (!startsWith(last_entry, "log-")) - throw Exception("Error in zookeeper data: unexpected node " + last_entry + " in " + zookeeper_path + "/log", - ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER); + throw Exception(ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER, "Error in zookeeper data: unexpected node {} in {}/log", + last_entry, zookeeper_path); UInt64 last_entry_index = parse(last_entry.substr(strlen("log-"))); @@ -1576,7 +1576,7 @@ void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName( std::vector & covered_entries_to_wait) { if (!entry.actual_new_part_name.empty()) - throw Exception("Entry actual part isn't empty yet. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry actual part isn't empty yet. This is a bug."); entry.actual_new_part_name = actual_part_name; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 164ef4f3239..76b96899dac 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -274,10 +274,10 @@ ReplicatedMergeTreeSinkImpl::~ReplicatedMergeTreeSinkImpl() = defa static void assertSessionIsNotExpired(const zkutil::ZooKeeperPtr & zookeeper) { if (!zookeeper) - throw Exception("No ZooKeeper session.", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "No ZooKeeper session."); if (zookeeper->expired()) - throw Exception("ZooKeeper session has been expired.", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "ZooKeeper session has been expired."); } template @@ -341,7 +341,7 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const auto host = get_results[1]; if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) - throw Exception("Replica is not active right now", ErrorCodes::READONLY); + throw Exception(ErrorCodes::READONLY, "Replica is not active right now"); quorum_info.is_active_node_version = is_active.stat.version; quorum_info.host_node_version = host.stat.version; @@ -395,7 +395,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if (const auto * chunk_offsets_ptr = typeid_cast(chunk_info.get())) chunk_offsets = std::make_shared(chunk_offsets_ptr->offsets); else - throw Exception("No chunk info for async inserts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, chunk_offsets); @@ -748,9 +748,10 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( log_entry.new_part_name = part->name; /// TODO maybe add UUID here as well? log_entry.quorum = getQuorumSize(replicas_num); + log_entry.new_part_format = part->getFormat(); + if constexpr (!async_insert) log_entry.block_id = block_id; - log_entry.new_part_type = part->getType(); ops.emplace_back(zkutil::makeCreateRequest( storage.zookeeper_path + "/log/log-", @@ -861,7 +862,9 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( block_id_path.clear(); } else - throw Exception("Conflict block ids and block number lock should not be empty at the same time for async inserts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Conflict block ids and block number lock should not " + "be empty at the same time for async inserts"); /// Information about the part. storage.getCommitPartOps(ops, part, block_id_path); @@ -1000,7 +1003,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( if (loop_counter == max_iterations) { part->is_duplicate = true; /// Part is duplicate, just remove it from local FS - throw Exception("Too many transaction retries - it may indicate an error", ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Too many transaction retries - it may indicate an error"); } retries_ctl.requestUnconditionalRetry(); /// we want one more iteration w/o counting it as a try and timeout return; @@ -1021,7 +1024,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( /// So make it temporary to avoid its resurrection on restart rename_part_to_temporary(); - throw Exception("Another quorum insert has been already started", ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE); + throw Exception(ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE, "Another quorum insert has been already started"); } else { @@ -1125,7 +1128,7 @@ void ReplicatedMergeTreeSinkImpl::waitForQuorum( break; if (!event->tryWait(quorum_timeout_ms)) - throw Exception("Timeout while waiting for quorum", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout while waiting for quorum"); LOG_TRACE(log, "Quorum {} for part {} updated, will check quorum node still exists", quorum_path, part_name); } @@ -1136,14 +1139,14 @@ void ReplicatedMergeTreeSinkImpl::waitForQuorum( String value; if (!zookeeper->tryGet(storage.replica_path + "/is_active", value, &stat) || stat.version != is_active_node_version) - throw Exception("Replica become inactive while waiting for quorum", ErrorCodes::NO_ACTIVE_REPLICAS); + throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "Replica become inactive while waiting for quorum"); } catch (...) { /// We do not know whether or not data has been inserted /// - whether other replicas have time to download the part and mark the quorum as done. - throw Exception("Unknown status, client must retry. Reason: " + getCurrentExceptionMessage(false), - ErrorCodes::UNKNOWN_STATUS_OF_INSERT); + throw Exception(ErrorCodes::UNKNOWN_STATUS_OF_INSERT, "Unknown status, client must retry. Reason: {}", + getCurrentExceptionMessage(false)); } LOG_TRACE(log, "Quorum '{}' for part {} satisfied", quorum_path, part_name); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index cb916e393cf..f06ce725daa 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -186,58 +186,50 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat if (data_format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { if (date_column != from_zk.date_column) - throw Exception("Existing table metadata in ZooKeeper differs in date index column." - " Stored in ZooKeeper: " + from_zk.date_column + ", local: " + date_column, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in date index column. " + "Stored in ZooKeeper: {}, local: {}", from_zk.date_column, date_column); } else if (!from_zk.date_column.empty()) { - throw Exception( - "Existing table metadata in ZooKeeper differs in date index column." - " Stored in ZooKeeper: " + from_zk.date_column + ", local is custom-partitioned.", - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in date index column. " + "Stored in ZooKeeper: {}, local is custom-partitioned.", from_zk.date_column); } if (index_granularity != from_zk.index_granularity) - throw Exception("Existing table metadata in ZooKeeper differs in index granularity." - " Stored in ZooKeeper: " + DB::toString(from_zk.index_granularity) + ", local: " + DB::toString(index_granularity), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs " + "in index granularity. Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.index_granularity), DB::toString(index_granularity)); if (merging_params_mode != from_zk.merging_params_mode) - throw Exception("Existing table metadata in ZooKeeper differs in mode of merge operation." - " Stored in ZooKeeper: " + DB::toString(from_zk.merging_params_mode) + ", local: " - + DB::toString(merging_params_mode), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in mode of merge operation. " + "Stored in ZooKeeper: {}, local: {}", DB::toString(from_zk.merging_params_mode), + DB::toString(merging_params_mode)); if (sign_column != from_zk.sign_column) - throw Exception("Existing table metadata in ZooKeeper differs in sign column." - " Stored in ZooKeeper: " + from_zk.sign_column + ", local: " + sign_column, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sign column. " + "Stored in ZooKeeper: {}, local: {}", from_zk.sign_column, sign_column); /// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes /// in formatAST code. String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).expression_list_ast); if (primary_key != parsed_zk_primary_key) - throw Exception("Existing table metadata in ZooKeeper differs in primary key." - " Stored in ZooKeeper: " + from_zk.primary_key + - ", parsed from ZooKeeper: " + parsed_zk_primary_key + - ", local: " + primary_key, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in primary key. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.primary_key, parsed_zk_primary_key, primary_key); if (data_format_version != from_zk.data_format_version) - throw Exception("Existing table metadata in ZooKeeper differs in data format version." - " Stored in ZooKeeper: " + DB::toString(from_zk.data_format_version.toUnderType()) + - ", local: " + DB::toString(data_format_version.toUnderType()), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in data format version. " + "Stored in ZooKeeper: {}, local: {}", DB::toString(from_zk.data_format_version.toUnderType()), + DB::toString(data_format_version.toUnderType())); String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).expression_list_ast); if (partition_key != parsed_zk_partition_key) - throw Exception( - "Existing table metadata in ZooKeeper differs in partition key expression." - " Stored in ZooKeeper: " + from_zk.partition_key + - ", parsed from ZooKeeper: " + parsed_zk_partition_key + - ", local: " + partition_key, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in partition key expression. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.partition_key, parsed_zk_partition_key, partition_key); } void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const @@ -248,75 +240,57 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl String parsed_zk_sampling_expression = formattedAST(KeyDescription::parse(from_zk.sampling_expression, columns, context).definition_ast); if (sampling_expression != parsed_zk_sampling_expression) { - throw Exception( - "Existing table metadata in ZooKeeper differs in sample expression." - " Stored in ZooKeeper: " + from_zk.sampling_expression + - ", parsed from ZooKeeper: " + parsed_zk_sampling_expression + - ", local: " + sampling_expression, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sample expression. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.sampling_expression, parsed_zk_sampling_expression, sampling_expression); } String parsed_zk_sorting_key = formattedAST(extractKeyExpressionList(KeyDescription::parse(from_zk.sorting_key, columns, context).definition_ast)); if (sorting_key != parsed_zk_sorting_key) { - throw Exception( - "Existing table metadata in ZooKeeper differs in sorting key expression." - " Stored in ZooKeeper: " + from_zk.sorting_key + - ", parsed from ZooKeeper: " + parsed_zk_sorting_key + - ", local: " + sorting_key, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in sorting key expression. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.sorting_key, parsed_zk_sorting_key, sorting_key); } auto parsed_primary_key = KeyDescription::parse(primary_key, columns, context); String parsed_zk_ttl_table = formattedAST(TTLTableDescription::parse(from_zk.ttl_table, columns, context, parsed_primary_key).definition_ast); if (ttl_table != parsed_zk_ttl_table) { - throw Exception( - "Existing table metadata in ZooKeeper differs in TTL." - " Stored in ZooKeeper: " + from_zk.ttl_table + - ", parsed from ZooKeeper: " + parsed_zk_ttl_table + - ", local: " + ttl_table, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in TTL. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.ttl_table, parsed_zk_ttl_table, ttl_table); } String parsed_zk_skip_indices = IndicesDescription::parse(from_zk.skip_indices, columns, context).toString(); if (skip_indices != parsed_zk_skip_indices) { - throw Exception( - "Existing table metadata in ZooKeeper differs in skip indexes." - " Stored in ZooKeeper: " + from_zk.skip_indices + - ", parsed from ZooKeeper: " + parsed_zk_skip_indices + - ", local: " + skip_indices, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in skip indexes. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.skip_indices, parsed_zk_skip_indices, skip_indices); } String parsed_zk_projections = ProjectionsDescription::parse(from_zk.projections, columns, context).toString(); if (projections != parsed_zk_projections) { - throw Exception( - "Existing table metadata in ZooKeeper differs in projections." - " Stored in ZooKeeper: " + from_zk.projections + - ", parsed from ZooKeeper: " + parsed_zk_projections + - ", local: " + projections, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in projections. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.projections, parsed_zk_projections, projections); } String parsed_zk_constraints = ConstraintsDescription::parse(from_zk.constraints).toString(); if (constraints != parsed_zk_constraints) { - throw Exception( - "Existing table metadata in ZooKeeper differs in constraints." - " Stored in ZooKeeper: " + from_zk.constraints + - ", parsed from ZooKeeper: " + parsed_zk_constraints + - ", local: " + constraints, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in constraints. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.constraints, parsed_zk_constraints, constraints); } if (from_zk.index_granularity_bytes_found_in_zk && index_granularity_bytes != from_zk.index_granularity_bytes) - throw Exception("Existing table metadata in ZooKeeper differs in index granularity bytes." - " Stored in ZooKeeper: " + DB::toString(from_zk.index_granularity_bytes) + - ", local: " + DB::toString(index_granularity_bytes), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in index granularity bytes. " + "Stored in ZooKeeper: {}, local: {}", from_zk.index_granularity_bytes, index_granularity_bytes); } ReplicatedMergeTreeTableMetadata::Diff diff --git a/src/Storages/MergeTree/ZooKeeperRetries.h b/src/Storages/MergeTree/ZooKeeperRetries.h index 22ace074245..cddde4673f1 100644 --- a/src/Storages/MergeTree/ZooKeeperRetries.h +++ b/src/Storages/MergeTree/ZooKeeperRetries.h @@ -212,7 +212,7 @@ private: void throwIfError() const { if (user_error.code != ErrorCodes::OK) - throw Exception(user_error.code, user_error.message); + throw Exception::createDeprecated(user_error.message, user_error.code); if (keeper_error.code != KeeperError::Code::ZOK) throw zkutil::KeeperException(keeper_error.code, keeper_error.message); diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 6f9f16b6155..2ec83d99eeb 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -71,9 +71,8 @@ IMergeTreeDataPart::Checksums checkDataPart( } if (columns_txt != columns_list) - throw Exception("Columns doesn't match in part " + data_part_storage.getFullPath() - + ". Expected: " + columns_list.toString() - + ". Found: " + columns_txt.toString(), ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Columns doesn't match in part {}. Expected: {}. Found: {}", + data_part_storage.getFullPath(), columns_list.toString(), columns_txt.toString()); /// Real checksums based on contents of data. Must correspond to checksums.txt. If not - it means the data is broken. IMergeTreeDataPart::Checksums checksums_data; @@ -144,7 +143,7 @@ IMergeTreeDataPart::Checksums checkDataPart( } else { - throw Exception("Unknown type in part " + data_part_storage.getFullPath(), ErrorCodes::UNKNOWN_PART_TYPE); + throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown type in part {}", data_part_storage.getFullPath()); } /// Checksums from the rest files listed in checksums.txt. May be absent. If present, they are subsequently compared with the actual data checksums. diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp index 3559eff1f6b..3b05e3df8d3 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/localBackup.cpp @@ -25,7 +25,7 @@ void localBackupImpl( return; if (level >= 1000) - throw DB::Exception("Too deep recursion", DB::ErrorCodes::TOO_DEEP_RECURSION); + throw DB::Exception(DB::ErrorCodes::TOO_DEEP_RECURSION, "Too deep recursion"); disk->createDirectories(destination_path); @@ -93,7 +93,8 @@ void localBackup( { if (disk->exists(destination_path) && !disk->isDirectoryEmpty(destination_path)) { - throw DB::Exception("Directory " + fullPath(disk, destination_path) + " already exists and is not empty.", DB::ErrorCodes::DIRECTORY_ALREADY_EXISTS); + throw DB::Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists and is not empty.", + DB::fullPath(disk, destination_path)); } size_t try_no = 0; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 620591abbf3..2a94991ab8b 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -61,11 +61,7 @@ static Names extractColumnNames(const ASTPtr & node) } } -static String getMergeTreeVerboseHelp(bool) -{ - using namespace std::string_literals; - - String help = R"( +constexpr auto verbose_help_message = R"( Syntax for the MergeTree table engine: @@ -89,9 +85,6 @@ See details in documentation: https://clickhouse.com/docs/en/engines/table-engin If you use the Replicated version of engines, see https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication/. )"; - return help; -} - static ColumnsDescription getColumnsDescriptionFromZookeeper(const String & raw_zookeeper_path, ContextMutablePtr context) { String zookeeper_name = zkutil::extractZooKeeperName(raw_zookeeper_path); @@ -180,8 +173,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) else if (name_part == "VersionedCollapsing") merging_params.mode = MergeTreeData::MergingParams::VersionedCollapsing; else if (!name_part.empty()) - throw Exception( - "Unknown storage " + args.engine_name + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::UNKNOWN_STORAGE); + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown storage {}", + args.engine_name + verbose_help_message); /// NOTE Quite complicated. @@ -256,13 +249,6 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (arg_cnt < min_num_params || arg_cnt > max_num_params) { String msg; - if (is_extended_storage_def) - msg += fmt::format("With extended storage definition syntax storage {} requires ", args.engine_name); - else - msg += fmt::format("ORDER BY or PRIMARY KEY clause is missing. " - "Consider using extended storage definition syntax with ORDER BY or PRIMARY KEY clause. " - "With deprecated old syntax (highly not recommended) storage {} requires ", args.engine_name); - if (max_num_params == 0) msg += "no parameters"; else if (min_num_params == max_num_params) @@ -270,10 +256,14 @@ static StoragePtr create(const StorageFactory::Arguments & args) else msg += fmt::format("{} to {} parameters: {}", min_num_params, max_num_params, needed_params); - - msg += getMergeTreeVerboseHelp(is_extended_storage_def); - - throw Exception(msg, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (is_extended_storage_def) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "With extended storage definition syntax storage {} requires {}{}", + args.engine_name, msg, verbose_help_message); + else + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "ORDER BY or PRIMARY KEY clause is missing. " + "Consider using extended storage definition syntax with ORDER BY or PRIMARY KEY clause. " + "With deprecated old syntax (highly not recommended) storage {} requires {}{}", + args.engine_name, msg, verbose_help_message); } if (is_extended_storage_def) @@ -301,7 +291,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) catch (Exception & e) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot evaluate engine argument {}: {} {}", - arg_idx, e.message(), getMergeTreeVerboseHelp(is_extended_storage_def)); + arg_idx, e.message(), verbose_help_message); } } else if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_deprecated_syntax_for_merge_tree) @@ -388,21 +378,17 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) zookeeper_path = ast_zk_path->value.safeGet(); else - throw Exception( - "Path in ZooKeeper must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path in ZooKeeper must be a string literal{}", verbose_help_message); ++arg_num; ast_replica_name = engine_args[arg_num]->as(); if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) replica_name = ast_replica_name->value.safeGet(); else - throw Exception( - "Replica name must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", verbose_help_message); if (replica_name.empty()) - throw Exception( - "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN); + throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", verbose_help_message); ++arg_num; expand_macro(ast_zk_path, ast_replica_name); @@ -437,7 +423,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) arg_cnt += 2; } else - throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected two string literal arguments: zookeeper_path and replica_name"); } /// This merging param maybe used as part of sorting key @@ -446,9 +432,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) { if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.sign_column)) - throw Exception( - "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sign column name must be an unquoted string{}", verbose_help_message); --arg_cnt; } else if (merging_params.mode == MergeTreeData::MergingParams::Replacing) @@ -457,9 +441,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (arg_cnt && !engine_args[arg_cnt - 1]->as()) { if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.version_column)) - throw Exception( - "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Version column name must be an unquoted string{}", verbose_help_message); --arg_cnt; } } @@ -475,19 +457,19 @@ static StoragePtr create(const StorageFactory::Arguments & args) else if (merging_params.mode == MergeTreeData::MergingParams::Graphite) { String graphite_config_name; - String error_msg - = "Last parameter of GraphiteMergeTree must be the name (in single quotes) of the element in configuration file with the Graphite options"; - error_msg += getMergeTreeVerboseHelp(is_extended_storage_def); + constexpr auto format_str = "Last parameter of GraphiteMergeTree must be the name (in single quotes) of the element " + "in configuration file with the Graphite options{}"; + String error_msg = verbose_help_message; if (const auto * ast = engine_args[arg_cnt - 1]->as()) { if (ast->value.getType() != Field::Types::String) - throw Exception(error_msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); graphite_config_name = ast->value.get(); } else - throw Exception(error_msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); --arg_cnt; setGraphitePatternsFromConfig(args.getContext(), graphite_config_name, merging_params.graphite_params); @@ -495,16 +477,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) else if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) { if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.version_column)) - throw Exception( - "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Version column name must be an unquoted string{}", verbose_help_message); --arg_cnt; if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.sign_column)) - throw Exception( - "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sign column name must be an unquoted string{}", verbose_help_message); --arg_cnt; /// Version collapsing is the only engine which add additional column to @@ -544,10 +522,9 @@ static StoragePtr create(const StorageFactory::Arguments & args) args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); if (!args.storage_def->order_by) - throw Exception( - "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " - "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " + "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()"); /// Get sorting key from engine arguments. /// @@ -625,9 +602,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// Syntax: *MergeTree(..., date, [sample_key], primary_key, index_granularity, ...) /// Get date: if (!tryGetIdentifierNameInto(engine_args[arg_num], date_column_name)) - throw Exception( - "Date column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Date column name must be an unquoted string{}", verbose_help_message); auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); @@ -677,13 +652,11 @@ static StoragePtr create(const StorageFactory::Arguments & args) } } else - throw Exception( - "Index granularity must be a positive integer" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Index granularity must be a positive integer{}", verbose_help_message); ++arg_num; if (args.storage_def->ttl_table && !args.attach) - throw Exception("Table TTL is not allowed for MergeTree in old syntax", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table TTL is not allowed for MergeTree in old syntax"); } DataTypes data_types = metadata.partition_key.data_types; @@ -696,7 +669,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) } if (arg_num != arg_cnt) - throw Exception("Wrong number of engine arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of engine arguments."); if (replicated) { diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index ffc2cfc3086..0c9e9223929 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -46,8 +46,9 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, const auto & assignment = assignment_ast->as(); auto insertion = res.column_to_update_expression.emplace(assignment.column_name, assignment.expression()); if (!insertion.second) - throw Exception("Multiple assignments in the single statement to column " + backQuote(assignment.column_name), - ErrorCodes::MULTIPLE_ASSIGNMENTS_TO_COLUMN); + throw Exception(ErrorCodes::MULTIPLE_ASSIGNMENTS_TO_COLUMN, + "Multiple assignments in the single statement to column {}", + backQuote(assignment.column_name)); } return res; } @@ -188,7 +189,7 @@ void MutationCommands::readText(ReadBuffer & in) auto * command_ast = child->as(); auto command = MutationCommand::parse(command_ast, true); if (!command) - throw Exception("Unknown mutation command type: " + DB::toString(command_ast->type), ErrorCodes::UNKNOWN_MUTATION_COMMAND); + throw Exception(ErrorCodes::UNKNOWN_MUTATION_COMMAND, "Unknown mutation command type: {}", DB::toString(command_ast->type)); push_back(std::move(*command)); } } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index cb5dff7d082..f1724b8c14c 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -291,7 +291,7 @@ void StorageNATS::read( size_t /* num_streams */) { if (!consumers_ready) - throw Exception("NATS consumers setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_NATS); + throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "NATS consumers setup not finished. Connection might be lost"); if (num_created_consumers == 0) return; @@ -364,8 +364,9 @@ SinkToStoragePtr StorageNATS::write(const ASTPtr &, const StorageMetadataPtr & m if (subjects.size() > 1) { throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "This NATS engine reads from multiple subjects. You must specify `stream_like_engine_insert_queue` to choose the subject to write to"); + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "This NATS engine reads from multiple subjects. " + "You must specify `stream_like_engine_insert_queue` to choose the subject to write to"); } else { @@ -616,7 +617,7 @@ bool StorageNATS::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); // Create an INSERT query for streaming data auto insert = std::make_shared(); @@ -717,14 +718,13 @@ void registerStorageNATS(StorageFactory & factory) nats_settings->loadFromQuery(*args.storage_def); if (!nats_settings->nats_url.changed && !nats_settings->nats_server_list.changed) - throw Exception( - "You must specify either `nats_url` or `nats_server_list` settings", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify either `nats_url` or `nats_server_list` settings"); if (!nats_settings->nats_format.changed) - throw Exception("You must specify `nats_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_format` setting"); if (!nats_settings->nats_subjects.changed) - throw Exception("You must specify `nats_subjects` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_subjects` setting"); return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(nats_settings), args.attach); }; diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 9a54f6cc04b..92aea597ab3 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -67,7 +67,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.move_destination_type = PartitionCommand::MoveDestinationType::SHARD; break; case DataDestinationType::DELETE: - throw Exception("ALTER with this destination type is not handled. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "ALTER with this destination type is not handled. This is a bug."); } if (res.move_destination_type != PartitionCommand::MoveDestinationType::TABLE) res.move_destination_name = command_ast->move_destination_name; @@ -163,7 +163,7 @@ std::string PartitionCommand::typeToString() const case PartitionCommand::Type::REPLACE_PARTITION: return "REPLACE PARTITION"; default: - throw Exception("Uninitialized partition command", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uninitialized partition command"); } } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index f0c5807f89c..574b5d76bbe 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -106,11 +106,13 @@ void MaterializedPostgreSQLConsumer::assertCorrectInsertion(StorageData::Buffer || column_idx >= buffer.description.types.size() || column_idx >= buffer.columns.size()) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Attempt to insert into buffer at position: {}, but block columns size is {}, types size: {}, columns size: {}, buffer structure: {}", - column_idx, - buffer.description.sample_block.columns(), buffer.description.types.size(), buffer.columns.size(), - buffer.description.sample_block.dumpStructure()); + ErrorCodes::LOGICAL_ERROR, + "Attempt to insert into buffer at position: " + "{}, but block columns size is {}, types size: {}, columns size: {}, buffer structure: {}", + column_idx, + buffer.description.sample_block.columns(), + buffer.description.types.size(), buffer.columns.size(), + buffer.description.sample_block.dumpStructure()); } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index b81e029acff..f450604fded 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -229,7 +229,8 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) { if (user_provided_snapshot.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Using a user-defined replication slot must be provided with a snapshot from EXPORT SNAPSHOT when the slot is created." + "Using a user-defined replication slot must " + "be provided with a snapshot from EXPORT SNAPSHOT when the slot is created." "Pass it to `materialized_postgresql_snapshot` setting"); snapshot_name = user_provided_snapshot; } @@ -618,7 +619,8 @@ void PostgreSQLReplicationHandler::removeTableFromPublication(pqxx::nontransacti catch (const pqxx::undefined_table &) { /// Removing table from replication must succeed even if table does not exist in PostgreSQL. - LOG_WARNING(log, "Did not remove table {} from publication, because table does not exist in PostgreSQL", doubleQuoteWithSchema(table_name), publication_name); + LOG_WARNING(log, "Did not remove table {} from publication, because table does not exist in PostgreSQL (publication: {})", + doubleQuoteWithSchema(table_name), publication_name); } } @@ -755,7 +757,7 @@ std::set PostgreSQLReplicationHandler::fetchRequiredTables() } LOG_ERROR(log, - "Publication {} already exists, but specified tables list differs from publication tables list in tables: {}. ", + "Publication {} already exists, but specified tables list differs from publication tables list in tables: {}. " "Will use tables list from setting. " "To avoid redundant work, you can try ALTER PUBLICATION query to remove redundant tables. " "Or you can you ALTER SETTING. " diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 6d12960824a..a30dd6deb77 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -552,7 +552,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); if (!args.storage_def->order_by) - throw Exception("Storage MaterializedPostgreSQL needs order by key or primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL needs order by key or primary key"); if (args.storage_def->primary_key) metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.getContext()); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 969577fdf3f..e568fba0495 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -91,13 +91,13 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const const auto * projection_definition = definition_ast->as(); if (!projection_definition) - throw Exception("Cannot create projection from non ASTProjectionDeclaration AST", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot create projection from non ASTProjectionDeclaration AST"); if (projection_definition->name.empty()) - throw Exception("Projection must have name in definition.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Projection must have name in definition."); if (!projection_definition->query) - throw Exception("QUERY is required for projection", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "QUERY is required for projection"); ProjectionDescription result; result.definition_ast = projection_definition->clone(); @@ -123,8 +123,7 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const if (select.hasAggregation()) { if (query.orderBy()) - throw Exception( - "When aggregation is used in projection, ORDER BY cannot be specified", ErrorCodes::ILLEGAL_PROJECTION); + throw Exception(ErrorCodes::ILLEGAL_PROJECTION, "When aggregation is used in projection, ORDER BY cannot be specified"); result.type = ProjectionDescription::Type::Aggregate; if (const auto & group_expression_list = query_select.groupBy()) @@ -242,7 +241,7 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( result.sample_block_for_keys.insert({nullptr, key.type, key.name}); auto it = partition_column_name_to_value_index.find(key.name); if (it == partition_column_name_to_value_index.end()) - throw Exception("minmax_count projection can only have keys about partition columns. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "minmax_count projection can only have keys about partition columns. It's a bug"); result.partition_value_indices.push_back(it->second); } } @@ -295,7 +294,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) Block ret; executor.pull(ret); if (executor.pull(ret)) - throw Exception("Projection cannot increase the number of rows in a block. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection cannot increase the number of rows in a block. It's a bug"); return ret; } @@ -342,7 +341,7 @@ const ProjectionDescription & ProjectionsDescription::get(const String & project { String exception_message = fmt::format("There is no projection {} in table", projection_name); appendHintsMessage(exception_message, projection_name); - throw Exception(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); } return *(it->second); @@ -354,8 +353,8 @@ void ProjectionsDescription::add(ProjectionDescription && projection, const Stri { if (if_not_exists) return; - throw Exception( - "Cannot add projection " + projection.name + ": projection with this name already exists", ErrorCodes::ILLEGAL_PROJECTION); + throw Exception(ErrorCodes::ILLEGAL_PROJECTION, "Cannot add projection {}: projection with this name already exists", + projection.name); } auto insert_it = projections.cend(); @@ -387,7 +386,7 @@ void ProjectionsDescription::remove(const String & projection_name, bool if_exis String exception_message = fmt::format("There is no projection {} in table", projection_name); appendHintsMessage(exception_message, projection_name); - throw Exception(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); + throw Exception::createDeprecated(exception_message, ErrorCodes::NO_SUCH_PROJECTION_IN_TABLE); } projections.erase(it->second); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 04decb91f7d..c23ef063145 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -216,7 +216,7 @@ AMQP::ExchangeType StorageRabbitMQ::defineExchangeType(String exchange_type_) else if (exchange_type_ == ExchangeType::TOPIC) type = AMQP::ExchangeType::topic; else if (exchange_type_ == ExchangeType::HASH) type = AMQP::ExchangeType::consistent_hash; else if (exchange_type_ == ExchangeType::HEADERS) type = AMQP::ExchangeType::headers; - else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); + else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid exchange type"); } else { @@ -404,8 +404,9 @@ void StorageRabbitMQ::initExchange(AMQP::TcpChannel & rabbit_channel) /// This error can be a result of attempt to declare exchange if it was already declared but /// 1) with different exchange type. /// 2) with different exchange settings. - throw Exception("Unable to declare exchange. Make sure specified exchange is not already declared. Error: " - + std::string(message), ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE); + throw Exception(ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, + "Unable to declare exchange. Make sure specified exchange is not already declared. Error: {}", + std::string(message)); }); rabbit_channel.declareExchange(bridge_exchange, AMQP::fanout, AMQP::durable | AMQP::autodelete) @@ -413,7 +414,8 @@ void StorageRabbitMQ::initExchange(AMQP::TcpChannel & rabbit_channel) { /// This error is not supposed to happen as this exchange name is always unique to type and its settings. throw Exception( - ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, "Unable to declare bridge exchange ({}). Reason: {}", bridge_exchange, std::string(message)); + ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, + "Unable to declare bridge exchange ({}). Reason: {}", bridge_exchange, std::string(message)); }); if (!hash_exchange) @@ -548,10 +550,10 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_chann * max_block_size parameter. Solution: client should specify a different queue_base parameter or manually delete previously * declared queues via any of the various cli tools. */ - throw Exception("Failed to declare queue. Probably queue settings are conflicting: max_block_size, deadletter_exchange. Attempt \ - specifying differently those settings or use a different queue_base or manually delete previously declared queues, \ - which were declared with the same names. ERROR reason: " - + std::string(message), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to declare queue. Probably queue settings are conflicting: " + "max_block_size, deadletter_exchange. Attempt specifying differently those settings " + "or use a different queue_base or manually delete previously declared queues, " + "which were declared with the same names. ERROR reason: {}", std::string(message)); }); AMQP::Table queue_settings; @@ -650,7 +652,7 @@ void StorageRabbitMQ::unbindExchange() }) .onError([&](const char * message) { - throw Exception("Unable to remove exchange. Reason: " + std::string(message), ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE); + throw Exception(ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE, "Unable to remove exchange. Reason: {}", std::string(message)); }); connection->getHandler().startBlockingLoop(); @@ -676,7 +678,7 @@ void StorageRabbitMQ::read( size_t /* num_streams */) { if (!rabbit_is_ready) - throw Exception("RabbitMQ setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_RABBITMQ); + throw Exception(ErrorCodes::CANNOT_CONNECT_RABBITMQ, "RabbitMQ setup not finished. Connection might be lost"); if (num_created_consumers == 0) { @@ -686,7 +688,8 @@ void StorageRabbitMQ::read( } if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, + "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); if (mv_attached) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageRabbitMQ with attached materialized views"); @@ -1056,7 +1059,7 @@ bool StorageRabbitMQ::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); // Create an INSERT query for streaming data auto insert = std::make_shared(); @@ -1194,11 +1197,11 @@ void registerStorageRabbitMQ(StorageFactory & factory) if (!rabbitmq_settings->rabbitmq_host_port.changed && !rabbitmq_settings->rabbitmq_address.changed) - throw Exception("You must specify either `rabbitmq_host_port` or `rabbitmq_address` settings", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "You must specify either `rabbitmq_host_port` or `rabbitmq_address` settings"); if (!rabbitmq_settings->rabbitmq_format.changed) - throw Exception("You must specify `rabbitmq_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `rabbitmq_format` setting"); return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings), args.attach); }; diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 1d0c8e62d20..e0ef967d491 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -183,7 +183,7 @@ ReadInOrderOptimizer::ReadInOrderOptimizer( , query(query_) { if (elements_actions.size() != required_sort_description.size()) - throw Exception("Sizes of sort description and actions are mismatched", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of sort description and actions are mismatched"); /// Do not analyze joined columns. /// They may have aliases and come to description as is. diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index b1b158a2aa5..47d036c943d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -51,12 +51,12 @@ void EmbeddedRocksDBSink::consume(Chunk chunk) } status = batch.Put(wb_key.str(), wb_value.str()); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } status = storage.rocksdb_ptr->Write(rocksdb::WriteOptions(), &batch); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 4e871300861..90034c81f10 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -33,7 +33,6 @@ #include #include -#include #include @@ -141,8 +140,8 @@ public: if (!iterator->status().ok()) { - throw Exception("Engine " + getName() + " got error while seeking key value data: " + iterator->status().ToString(), - ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Engine {} got error while seeking key value data: {}", + getName(), iterator->status().ToString()); } Block block = sample_block.cloneWithColumns(std::move(columns)); return Chunk(block.getColumns(), block.rows()); @@ -218,7 +217,7 @@ void StorageEmbeddedRocksDB::checkMutationIsPossible(const MutationCommands & co throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only DELETE and UPDATE mutation supported for EmbeddedRocksDB"); } -void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPtr context_, bool /*force_wait*/) +void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPtr context_) { if (commands.empty()) return; @@ -262,12 +261,12 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt column_it->type->getDefaultSerialization()->serializeBinary(*column, i, wb_key, {}); auto status = batch.Delete(wb_key.str()); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } auto status = rocksdb_ptr->Write(rocksdb::WriteOptions(), &batch); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } return; @@ -459,8 +458,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto engine_args = args.engine_args; if (engine_args.size() > 3) { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} requires at most 3 parameters. ({} given). Correct usage: EmbeddedRocksDB([ttl, rocksdb_dir, read_only])", - args.engine_name, engine_args.size()); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Engine {} requires at most 3 parameters. " + "({} given). Correct usage: EmbeddedRocksDB([ttl, rocksdb_dir, read_only])", + args.engine_name, engine_args.size()); } Int32 ttl{0}; @@ -478,20 +479,20 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.setConstraints(args.constraints); if (!args.storage_def->primary_key) - throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.getContext()); auto primary_key_names = metadata.getColumnsRequiredForPrimaryKey(); if (primary_key_names.size() != 1) { - throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); } return std::make_shared(args.table_id, args.relative_data_path, metadata, args.attach, args.getContext(), primary_key_names[0], ttl, std::move(rocksdb_dir), read_only); } std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistics() const { - std::shared_lock lock(rocksdb_ptr_mx); + std::shared_lock lock(rocksdb_ptr_mx); if (!rocksdb_ptr) return nullptr; return rocksdb_ptr->GetOptions().statistics; @@ -499,7 +500,7 @@ std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistic std::vector StorageEmbeddedRocksDB::multiGet(const std::vector & slices_keys, std::vector & values) const { - std::shared_lock lock(rocksdb_ptr_mx); + std::shared_lock lock(rocksdb_ptr_mx); if (!rocksdb_ptr) return {}; return rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 02938fb5f69..32d7740009e 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -52,7 +52,7 @@ public: void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) override; void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override; - void mutate(const MutationCommands &, ContextPtr, bool) override; + void mutate(const MutationCommands &, ContextPtr) override; bool supportsParallelInsert() const override { return true; } bool supportsIndexForIn() const override { return true; } @@ -86,7 +86,7 @@ private: const String primary_key; using RocksDBPtr = std::unique_ptr; RocksDBPtr rocksdb_ptr; - mutable std::shared_mutex rocksdb_ptr_mx; + mutable SharedMutex rocksdb_ptr_mx; String rocksdb_dir; Int32 ttl; bool read_only; diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp index 2cc8f769cf1..37615a4187a 100644 --- a/src/Storages/SelectQueryDescription.cpp +++ b/src/Storages/SelectQueryDescription.cpp @@ -64,8 +64,7 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPt "StorageMaterializedView cannot be created from table functions ({})", serializeAST(*subquery)); if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", - ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); auto & inner_query = ast_select->list_of_selects->children.at(0); @@ -79,7 +78,7 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPt void checkAllowedQueries(const ASTSelectQuery & query) { if (query.prewhere() || query.final() || query.sampleSize()) - throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL."); ASTPtr subquery = extractTableExpression(query, 0); if (!subquery) @@ -88,7 +87,7 @@ void checkAllowedQueries(const ASTSelectQuery & query) if (const auto * ast_select = subquery->as()) { if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); const auto & inner_query = ast_select->list_of_selects->children.at(0); @@ -119,7 +118,7 @@ SelectQueryDescription SelectQueryDescription::getSelectQueryFromASTForMatView(c ASTPtr new_inner_query; if (!isSingleSelect(select, new_inner_query)) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); auto & select_query = new_inner_query->as(); checkAllowedQueries(select_query); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index cfdf2a630f9..4d3e46a36f5 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -111,7 +111,7 @@ StoragePtr StorageBuffer::getDestinationTable() const auto destination = DatabaseCatalog::instance().tryGetTable(destination_id, getContext()); if (destination.get() == this) - throw Exception("Destination table is myself. Will lead to infinite loop.", ErrorCodes::INFINITE_LOOP); + throw Exception(ErrorCodes::INFINITE_LOOP, "Destination table is myself. Will lead to infinite loop."); return destination; } @@ -518,7 +518,7 @@ static void appendBlock(Poco::Logger * log, const Block & from, Block & to) } /// But if there is still nothing, abort if (!col_to) - throw Exception("No column to rollback", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No column to rollback"); if (col_to->size() != old_rows) col_to = col_to->cut(0, old_rows); } @@ -563,7 +563,7 @@ public: { destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id, storage.getContext()); if (destination.get() == &storage) - throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP); + throw Exception(ErrorCodes::INFINITE_LOOP, "Destination table is myself. Write will cause infinite loop."); } size_t bytes = block.bytes(); @@ -720,13 +720,13 @@ bool StorageBuffer::optimize( ContextPtr /*context*/) { if (partition) - throw Exception("Partition cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Partition cannot be specified when optimizing table of type Buffer"); if (final) - throw Exception("FINAL cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "FINAL cannot be specified when optimizing table of type Buffer"); if (deduplicate) - throw Exception("DEDUPLICATE cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type Buffer"); flushAllBuffers(false); return true; @@ -1081,9 +1081,10 @@ void registerStorageBuffer(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() < 9 || engine_args.size() > 12) - throw Exception("Storage Buffer requires from 9 to 12 parameters: " - " destination_database, destination_table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes].", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Buffer requires from 9 to 12 parameters: " + " destination_database, destination_table, num_buckets, min_time, max_time, min_rows, " + "max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes]."); // Table and database name arguments accept expressions, evaluate them. engine_args[0] = evaluateConstantExpressionForDatabaseName(engine_args[0], args.getLocalContext()); diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index a76c4dffb5b..5d1f08771be 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -153,7 +153,7 @@ void StorageDictionary::checkTableCanBeDropped() const dictionary_name); if (location == Location::DictionaryDatabase) throw Exception(ErrorCodes::CANNOT_DETACH_DICTIONARY_AS_TABLE, - "Cannot drop/detach table from a database with DICTIONARY engine, use DROP DICTIONARY or DETACH DICTIONARY query instead", + "Cannot drop/detach table '{}' from a database with DICTIONARY engine, use DROP DICTIONARY or DETACH DICTIONARY query instead", dictionary_name); } @@ -342,8 +342,7 @@ void registerStorageDictionary(StorageFactory & factory) /// Create dictionary storage that is view of underlying dictionary if (args.engine_args.size() != 1) - throw Exception("Storage Dictionary requires single parameter: name of dictionary", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage Dictionary requires single parameter: name of dictionary"); args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], local_context); String dictionary_name = checkAndGetLiteralArgument(args.engine_args[0], "dictionary_name"); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 0b48bb70286..740ad67cc95 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -394,8 +394,10 @@ StorageDistributed::StorageDistributed( size_t num_local_shards = getCluster()->getLocalShardCount(); if (num_local_shards && (remote_database.empty() || remote_database == id_.database_name) && remote_table == id_.table_name) - throw Exception("Distributed table " + id_.table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); + throw Exception(ErrorCodes::INFINITE_LOOP, "Distributed table {} looks at itself", id_.table_name); } + + initializeFromDisk(); } @@ -480,7 +482,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( /// NOTE: distributed_group_by_no_merge=1 does not respect distributed_push_down_limit /// (since in this case queries processed separately and the initiator is just a proxy in this case). if (to_stage != QueryProcessingStage::Complete) - throw Exception("Queries with distributed_group_by_no_merge=1 should be processed to Complete stage", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Queries with distributed_group_by_no_merge=1 should be processed to Complete stage"); return QueryProcessingStage::Complete; } } @@ -766,7 +768,7 @@ void StorageDistributed::read( /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. if (!query_plan.isInitialized()) - throw Exception("Pipeline is not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline is not initialized"); if (local_context->getSettingsRef().allow_experimental_analyzer) { @@ -794,8 +796,8 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata /// Ban an attempt to make async insert into the table belonging to DatabaseMemory if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync && !settings.insert_shard_id) { - throw Exception("Storage " + getName() + " must have own data directory to enable asynchronous inserts", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage {} must have own data directory to enable asynchronous inserts", + getName()); } auto shard_num = cluster->getLocalShardCount() + cluster->getRemoteShardCount(); @@ -803,14 +805,13 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata /// If sharding key is not specified, then you can only write to a shard containing only one shard if (!settings.insert_shard_id && !settings.insert_distributed_one_random_shard && !has_sharding_key && shard_num >= 2) { - throw Exception( - "Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided", - ErrorCodes::STORAGE_REQUIRES_PARAMETER); + throw Exception(ErrorCodes::STORAGE_REQUIRES_PARAMETER, + "Method write is not supported by storage {} with more than one shard and no sharding key provided", getName()); } if (settings.insert_shard_id && settings.insert_shard_id > shard_num) { - throw Exception("Shard id should be range from 1 to shard number", ErrorCodes::INVALID_SHARD_ID); + throw Exception(ErrorCodes::INVALID_SHARD_ID, "Shard id should be range from 1 to shard number"); } /// Force sync insertion if it is remote() table function @@ -917,8 +918,8 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); auto connections = shard_info.pool->getMany(timeouts, &settings, PoolMode::GET_ONE); if (connections.empty() || connections.front().isNull()) - throw Exception( - "Expected exactly one connection for shard " + toString(shard_info.shard_num), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}", + shard_info.shard_num); /// INSERT SELECT query returns empty block auto remote_query_executor @@ -1006,7 +1007,7 @@ std::optional StorageDistributed::distributedWrite(const ASTInser { const Settings & settings = local_context->getSettingsRef(); if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); auto & select = query.select->as(); @@ -1041,7 +1042,8 @@ std::optional StorageDistributed::distributedWrite(const ASTInser if (local_context->getClientInfo().distributed_depth == 0) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parallel distributed INSERT SELECT is not possible. "\ - "Reason: distributed reading is supported only from Distributed engine or *Cluster table functions, but got {} storage", src_storage->getName()); + "Reason: distributed reading is supported only from Distributed engine " + "or *Cluster table functions, but got {} storage", src_storage->getName()); } return {}; @@ -1084,8 +1086,7 @@ void StorageDistributed::alter(const AlterCommands & params, ContextPtr local_co setInMemoryMetadata(new_metadata); } - -void StorageDistributed::startup() +void StorageDistributed::initializeFromDisk() { if (!storage_policy) return; @@ -1134,6 +1135,7 @@ void StorageDistributed::shutdown() cluster_nodes_data.clear(); LOG_DEBUG(log, "Background threads for async INSERT joined"); } + void StorageDistributed::drop() { // Some INSERT in-between shutdown() and drop() can call @@ -1154,7 +1156,15 @@ void StorageDistributed::drop() auto disks = data_volume->getDisks(); for (const auto & disk : disks) + { + if (!disk->exists(relative_data_path)) + { + LOG_INFO(log, "Path {} is already removed from disk {}", relative_data_path, disk->getName()); + continue; + } + disk->removeRecursive(relative_data_path); + } LOG_DEBUG(log, "Removed"); } @@ -1288,20 +1298,14 @@ ClusterPtr StorageDistributed::getOptimizedCluster( } UInt64 force = settings.force_optimize_skip_unused_shards; - if (force) + if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS || (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY && has_sharding_key)) { - WriteBufferFromOwnString exception_message; if (!has_sharding_key) - exception_message << "No sharding key"; + throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "No sharding key"); else if (!sharding_key_is_usable) - exception_message << "Sharding key is not deterministic"; + throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "Sharding key is not deterministic"); else - exception_message << "Sharding key " << sharding_key_column_name << " is not used"; - - if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS) - throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS); - if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY && has_sharding_key) - throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS); + throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "Sharding key {} is not used", sharding_key_column_name); } return {}; @@ -1330,7 +1334,7 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c #undef CREATE_FOR_TYPE - throw Exception{"Sharding key expression does not evaluate to an integer type", ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Sharding key expression does not evaluate to an integer type"); } /// Returns a new cluster with fewer shards if constant folding for `sharding_key_expr` is possible @@ -1389,7 +1393,7 @@ ClusterPtr StorageDistributed::skipUnusedShards( for (const auto & block : *blocks) { if (!block.has(sharding_key_column_name)) - throw Exception("sharding_key_expr should evaluate as a single row", ErrorCodes::TOO_MANY_ROWS); + throw Exception(ErrorCodes::TOO_MANY_ROWS, "sharding_key_expr should evaluate as a single row"); const ColumnWithTypeAndName & result = block.getByName(sharding_key_column_name); const auto selector = createSelector(cluster, result); @@ -1563,14 +1567,11 @@ void registerStorageDistributed(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() < 3 || engine_args.size() > 5) - throw Exception( - "Storage Distributed requires from 3 to 5 parameters - " - "name of configuration section with list of remote servers, " - "name of remote database, " - "name of remote table, " - "sharding key expression (optional), " - "policy to store data in (optional).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Distributed requires from 3 " + "to 5 parameters - name of configuration section with list " + "of remote servers, name of remote database, name " + "of remote table, sharding key expression (optional), policy to store data in (optional)."); String cluster_name = getClusterNameAndMakeLiteral(engine_args[0]); @@ -1598,13 +1599,13 @@ void registerStorageDistributed(StorageFactory & factory) const Block & block = sharding_expr->getSampleBlock(); if (block.columns() != 1) - throw Exception("Sharding expression must return exactly one column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Sharding expression must return exactly one column"); auto type = block.getByPosition(0).type; if (!type->isValueRepresentedByInteger()) - throw Exception("Sharding expression has type " + type->getName() + - ", but should be one of integer type", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Sharding expression has type {}, but should be one of integer type", + type->getName()); } /// TODO: move some arguments from the arguments to the SETTINGS. diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 55b0535d5e6..66fd7b77889 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -133,7 +133,7 @@ public: /// the structure of the sub-table is not checked void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; - void startup() override; + void initializeFromDisk(); void shutdown() override; void flush() override; void drop() override; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index a40ae12b2d1..9eeb619b899 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -233,7 +233,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) { ASTs & engine_args = args.engine_args; if (engine_args.size() < 2) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine ExternalDistributed must have at least 2 arguments: engine_name, named_collection and/or description"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Engine ExternalDistributed must have at least 2 arguments: " + "engine_name, named_collection and/or description"); auto engine_name = checkAndGetLiteralArgument(engine_args[0], "engine_name"); StorageExternalDistributed::ExternalStorageEngine table_engine; @@ -245,8 +247,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) table_engine = StorageExternalDistributed::ExternalStorageEngine::PostgreSQL; else throw Exception(ErrorCodes::BAD_ARGUMENTS, - "External storage engine {} is not supported for StorageExternalDistributed. Supported engines are: MySQL, PostgreSQL, URL", - engine_name); + "External storage engine {} is not supported for StorageExternalDistributed. " + "Supported engines are: MySQL, PostgreSQL, URL", + engine_name); ASTs inner_engine_args(engine_args.begin() + 1, engine_args.end()); String cluster_description; @@ -308,10 +311,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) else { if (engine_args.size() != 6) - throw Exception( - "Storage ExternalDistributed requires 5 parameters: " - "ExternalDistributed('engine_name', 'cluster_description', 'database', 'table', 'user', 'password').", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage ExternalDistributed requires 5 parameters: " + "ExternalDistributed('engine_name', 'cluster_description', 'database', 'table', 'user', 'password')."); cluster_description = checkAndGetLiteralArgument(engine_args[1], "cluster_description"); configuration.database = checkAndGetLiteralArgument(engine_args[2], "database"); diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index c2f6fb1608d..ff141bf108f 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -27,7 +27,7 @@ static void checkAllTypesAreAllowedInTable(const NamesAndTypesList & names_and_t { for (const auto & elem : names_and_types) if (elem.type->cannotBeStoredInTables()) - throw Exception("Data type " + elem.type->getName() + " cannot be used in tables", ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_TABLES); + throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_TABLES, "Data type {} cannot be used in tables", elem.type->getName()); } @@ -35,7 +35,7 @@ ContextMutablePtr StorageFactory::Arguments::getContext() const { auto ptr = context.lock(); if (!ptr) - throw Exception("Context has expired", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Context has expired"); return ptr; } @@ -43,7 +43,7 @@ ContextMutablePtr StorageFactory::Arguments::getLocalContext() const { auto ptr = local_context.lock(); if (!ptr) - throw Exception("Context has expired", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Context has expired"); return ptr; } @@ -51,8 +51,7 @@ ContextMutablePtr StorageFactory::Arguments::getLocalContext() const void StorageFactory::registerStorage(const std::string & name, CreatorFn creator_fn, StorageFeatures features) { if (!storages.emplace(name, Creator{std::move(creator_fn), features}).second) - throw Exception("TableFunctionFactory: the table function name '" + name + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: the table function name '{}' is not unique", name); } @@ -74,21 +73,21 @@ StoragePtr StorageFactory::get( if (query.is_ordinary_view) { if (query.storage) - throw Exception("Specifying ENGINE is not allowed for a View", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a View"); name = "View"; } else if (query.is_live_view) { if (query.storage) - throw Exception("Specifying ENGINE is not allowed for a LiveView", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a LiveView"); name = "LiveView"; } else if (query.is_dictionary) { if (query.storage) - throw Exception("Specifying ENGINE is not allowed for a Dictionary", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a Dictionary"); name = "Dictionary"; } @@ -109,16 +108,15 @@ StoragePtr StorageFactory::get( else { if (!query.storage) - throw Exception("Incorrect CREATE query: storage required", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect CREATE query: storage required"); if (!storage_def->engine) - throw Exception("Incorrect CREATE query: ENGINE required", ErrorCodes::ENGINE_REQUIRED); + throw Exception(ErrorCodes::ENGINE_REQUIRED, "Incorrect CREATE query: ENGINE required"); const ASTFunction & engine_def = *storage_def->engine; if (engine_def.parameters) - throw Exception( - "Engine definition cannot take the form of a parametric function", ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS); + throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Engine definition cannot take the form of a parametric function"); if (engine_def.arguments) has_engine_args = true; @@ -127,27 +125,25 @@ StoragePtr StorageFactory::get( if (name == "View") { - throw Exception( - "Direct creation of tables with ENGINE View is not supported, use CREATE VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Direct creation of tables with ENGINE View is not supported, use CREATE VIEW statement"); } else if (name == "MaterializedView") { - throw Exception( - "Direct creation of tables with ENGINE MaterializedView is not supported, use CREATE MATERIALIZED VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Direct creation of tables with ENGINE MaterializedView " + "is not supported, use CREATE MATERIALIZED VIEW statement"); } else if (name == "LiveView") { - throw Exception( - "Direct creation of tables with ENGINE LiveView is not supported, use CREATE LIVE VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Direct creation of tables with ENGINE LiveView " + "is not supported, use CREATE LIVE VIEW statement"); } else if (name == "WindowView") { - throw Exception( - "Direct creation of tables with ENGINE WindowView is not supported, use CREATE WINDOW VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Direct creation of tables with ENGINE WindowView " + "is not supported, use CREATE WINDOW VIEW statement"); } auto it = storages.find(name); @@ -155,17 +151,16 @@ StoragePtr StorageFactory::get( { auto hints = getHints(name); if (!hints.empty()) - throw Exception("Unknown table engine " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_STORAGE); + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown table engine {}. Maybe you meant: {}", name, toString(hints)); else - throw Exception("Unknown table engine " + name, ErrorCodes::UNKNOWN_STORAGE); + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown table engine {}", name); } auto check_feature = [&](String feature_description, FeatureMatcherFn feature_matcher_fn) { if (!feature_matcher_fn(it->second.features)) { - String msg = "Engine " + name + " doesn't support " + feature_description + ". " - "Currently only the following engines have support for the feature: ["; + String msg; auto supporting_engines = getAllRegisteredNamesByFeatureMatcherFn(feature_matcher_fn); for (size_t index = 0; index < supporting_engines.size(); ++index) { @@ -173,8 +168,9 @@ StoragePtr StorageFactory::get( msg += ", "; msg += supporting_engines[index]; } - msg += "]"; - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine {} doesn't support {}. " + "Currently only the following engines have support for the feature: [{}]", + name, feature_description, msg); } }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 419929dbef3..e2a2f84bc72 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -49,6 +49,7 @@ #include #include #include +#include namespace ProfileEvents @@ -186,7 +187,7 @@ void checkCreationIsAllowed( { auto table_path_stat = fs::status(table_path); if (fs::exists(table_path_stat) && fs::is_directory(table_path_stat)) - throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "File must not be a directory"); } } @@ -365,8 +366,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (format == "Distributed") { if (paths.empty()) - throw Exception( - "Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Cannot get table structure from file, because no files match specified name"); auto source = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]); return ColumnsDescription(source->getOutputs().front().getHeader().getNamesAndTypesList()); @@ -375,9 +375,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path. You must specify " - "table structure manually", - format); + "Cannot extract table structure from {} format file, because there are no files with provided path. " + "You must specify table structure manually", format); std::optional columns_from_cache; if (context->getSettingsRef().schema_inference_use_cache_for_file) @@ -418,9 +417,9 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args) total_bytes_to_read = buf.st_size; if (args.getContext()->getApplicationType() == Context::ApplicationType::SERVER) - throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Using file descriptor as source of storage isn't allowed for server daemons"); if (args.format_name == "Distributed") - throw Exception("Distributed format is allowed only with explicit file path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Distributed format is allowed only with explicit file path"); is_db_table = false; use_table_fd = true; @@ -446,9 +445,9 @@ StorageFile::StorageFile(const std::string & relative_table_dir_path, CommonArgu : StorageFile(args) { if (relative_table_dir_path.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Storage {} requires data path", getName()); if (args.format_name == "Distributed") - throw Exception("Distributed format is allowed only with explicit file path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Distributed format is allowed only with explicit file path"); String table_dir_path = fs::path(base_path) / relative_table_dir_path / ""; fs::create_directories(table_dir_path); @@ -486,7 +485,7 @@ void StorageFile::setStorageMetadata(CommonArguments args) { columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext()); if (!args.columns.empty() && args.columns != columns) - throw Exception("Table structure and file structure are different", ErrorCodes::INCOMPATIBLE_COLUMNS); + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } storage_metadata.setColumns(columns); } @@ -571,7 +570,7 @@ public: { shared_lock = std::shared_lock(storage->rwlock, getLockTimeout(context)); if (!shared_lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); } } @@ -715,7 +714,7 @@ Pipe StorageFile::read( if (context->getSettingsRef().engine_file_empty_if_not_exists) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); else - throw Exception("File " + paths[0] + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", paths[0]); } } @@ -854,7 +853,7 @@ public: , lock(std::move(lock_)) { if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); initialize(); } @@ -1024,7 +1023,7 @@ SinkToStoragePtr StorageFile::write( ContextPtr context) { if (format_name == "Distributed") - throw Exception("Method write is not implemented for Distributed format", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format"); int flags = 0; @@ -1061,7 +1060,9 @@ SinkToStoragePtr StorageFile::write( if (!paths.empty()) { if (is_path_with_globs) - throw Exception("Table '" + getStorageID().getNameForLogs() + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Table '{}' is in readonly mode because of globs in filepath", + getStorageID().getNameForLogs()); path = paths.back(); fs::create_directories(fs::path(path).parent_path()); @@ -1119,17 +1120,18 @@ bool StorageFile::storesDataOnDisk() const Strings StorageFile::getDataPaths() const { if (paths.empty()) - throw Exception("Table '" + getStorageID().getNameForLogs() + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Table '{}' is in readonly mode", getStorageID().getNameForLogs()); return paths; } void StorageFile::rename(const String & new_path_to_table_data, const StorageID & new_table_id) { if (!is_db_table) - throw Exception("Can't rename table " + getStorageID().getNameForLogs() + " bounded to user-defined file (or FD)", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Can't rename table {} bounded to user-defined file (or FD)", getStorageID().getNameForLogs()); if (paths.size() != 1) - throw Exception("Can't rename table " + getStorageID().getNameForLogs() + " in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Can't rename table {} in readonly mode", getStorageID().getNameForLogs()); std::string path_new = getTablePath(base_path + new_path_to_table_data, format_name); if (path_new == paths[0]) @@ -1149,7 +1151,7 @@ void StorageFile::truncate( TableExclusiveLockHolder &) { if (is_path_with_globs) - throw Exception("Can't truncate table '" + getStorageID().getNameForLogs() + "' in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Can't truncate table '{}' in readonly mode", getStorageID().getNameForLogs()); if (use_table_fd) { @@ -1197,9 +1199,9 @@ void registerStorageFile(StorageFactory & factory) ASTs & engine_args_ast = factory_args.engine_args; if (!(engine_args_ast.size() >= 1 && engine_args_ast.size() <= 3)) // NOLINT - throw Exception( - "Storage File requires from 1 to 3 arguments: name of used format, source and compression_method.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage File requires from 1 to 3 arguments: " + "name of used format, source and compression_method."); engine_args_ast[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args_ast[0], factory_args.getLocalContext()); storage_args.format_name = checkAndGetLiteralArgument(engine_args_ast[0], "format_name"); @@ -1251,8 +1253,8 @@ void registerStorageFile(StorageFactory & factory) else if (*opt_name == "stderr") source_fd = STDERR_FILENO; else - throw Exception( - "Unknown identifier '" + *opt_name + "' in second arg of File storage constructor", ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier '{}' in second arg of File storage constructor", + *opt_name); } else if (const auto * literal = engine_args_ast[1]->as()) { @@ -1264,7 +1266,7 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::String) source_path = literal->value.get(); else - throw Exception("Second argument must be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } if (engine_args_ast.size() == 3) diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index c00e82598b2..601306bd1bf 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -7,20 +7,24 @@ #include #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include -#include +#include +#include #include +#include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -157,7 +161,7 @@ ColumnPtr fillColumnWithRandomData( case TypeIndex::Array: { - auto nested_type = typeid_cast(type.get())->getNestedType(); + auto nested_type = typeid_cast(*type).getNestedType(); auto offsets_column = ColumnVector::create(); auto & offsets = offsets_column->getData(); @@ -175,6 +179,13 @@ ColumnPtr fillColumnWithRandomData( return ColumnArray::create(data_column, std::move(offsets_column)); } + case TypeIndex::Map: + { + const DataTypePtr & nested_type = typeid_cast(*type).getNestedType(); + auto nested_column = fillColumnWithRandomData(nested_type, limit, max_array_length, max_string_length, rng, context); + return ColumnMap::create(nested_column); + } + case TypeIndex::Tuple: { auto elements = typeid_cast(type.get())->getElements(); @@ -189,7 +200,7 @@ ColumnPtr fillColumnWithRandomData( case TypeIndex::Nullable: { - auto nested_type = typeid_cast(type.get())->getNestedType(); + auto nested_type = typeid_cast(*type).getNestedType(); auto nested_column = fillColumnWithRandomData(nested_type, limit, max_array_length, max_string_length, rng, context); auto null_map_column = ColumnUInt8::create(); @@ -372,9 +383,37 @@ ColumnPtr fillColumnWithRandomData( return column; } + case TypeIndex::LowCardinality: + { + /// We are generating the values using the same random distribution as for full columns + /// so it's not in fact "low cardinality", + /// but it's ok for testing purposes, because the LowCardinality data type supports high cardinality data as well. + + auto nested_type = typeid_cast(*type).getDictionaryType(); + auto nested_column = fillColumnWithRandomData(nested_type, limit, max_array_length, max_string_length, rng, context); + + auto column = type->createColumn(); + typeid_cast(*column).insertRangeFromFullColumn(*nested_column, 0, limit); + + return column; + } + case TypeIndex::IPv4: + { + auto column = ColumnIPv4::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(IPv4), rng); + return column; + } + case TypeIndex::IPv6: + { + auto column = ColumnIPv6::create(); + column->getData().resize(limit); + fillBufferWithRandomData(reinterpret_cast(column->getData().data()), limit * sizeof(IPv6), rng); + return column; + } default: - throw Exception("The 'GenerateRandom' is not implemented for type " + type->getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "The 'GenerateRandom' is not implemented for type {}", type->getName()); } } @@ -462,9 +501,9 @@ void registerStorageGenerateRandom(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() > 3) - throw Exception("Storage GenerateRandom requires at most three arguments: " - "random_seed, max_string_length, max_array_length.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage GenerateRandom requires at most three arguments: " + "random_seed, max_string_length, max_array_length."); std::optional random_seed; UInt64 max_string_length = 10; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index a80f21834db..f6550c6cd5d 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -80,7 +80,7 @@ void StorageInMemoryMetadata::setComment(const String & comment_) void StorageInMemoryMetadata::setColumns(ColumnsDescription columns_) { if (columns_.getAllPhysical().empty()) - throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Empty list of columns passed"); columns = std::move(columns_); } @@ -552,9 +552,8 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const auto & provided_columns_map = getColumnsMap(provided_columns); if (column_names.empty()) - throw Exception( - "Empty list of columns queried. There are columns: " + listOfColumns(available_columns), - ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, "Empty list of columns queried. There are columns: {}", + listOfColumns(available_columns)); UniqueStrings unique_names; @@ -606,7 +605,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const for (const auto & column : block) { if (names_in_block.contains(column.name)) - throw Exception("Duplicate column " + column.name + " in block", ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Duplicate column {} in block", column.name); names_in_block.insert(column.name); @@ -635,7 +634,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const for (const auto & available_column : available_columns) { if (!names_in_block.contains(available_column.name)) - throw Exception("Expected column " + available_column.name, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Expected column {}", available_column.name); } } } diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 18e8442c1b5..0ad3afb2e8a 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -70,7 +70,7 @@ Pipe StorageInput::read( } if (pipe.empty()) - throw Exception("Input stream is not initialized, input() must be used only in INSERT SELECT query", ErrorCodes::INVALID_USAGE_OF_INPUT); + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "Input stream is not initialized, input() must be used only in INSERT SELECT query"); return std::move(pipe); } diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 320f05e038f..b57e717c272 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -63,7 +63,7 @@ StorageJoin::StorageJoin( auto metadata_snapshot = getInMemoryMetadataPtr(); for (const auto & key : key_names) if (!metadata_snapshot->getColumns().hasPhysical(key)) - throw Exception{"Key column (" + key + ") does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE}; + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Key column ({}) does not exist in table declaration.", key); table_join = std::make_shared(limits, use_nulls, kind, strictness, key_names); join = std::make_shared(table_join, getRightSampleBlock(), overwrite); @@ -89,7 +89,11 @@ void StorageJoin::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPt std::lock_guard mutate_lock(mutate_mutex); TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context); - disk->removeRecursive(path); + if (disk->exists(path)) + disk->removeRecursive(path); + else + LOG_INFO(&Poco::Logger::get("StorageJoin"), "Path {} is already removed from disk {}", path, disk->getName()); + disk->createDirectories(path); disk->createDirectories(path + "tmp/"); @@ -101,10 +105,10 @@ void StorageJoin::checkMutationIsPossible(const MutationCommands & commands, con { for (const auto & command : commands) if (command.type != MutationCommand::DELETE) - throw Exception("Table engine Join supports only DELETE mutations", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine Join supports only DELETE mutations"); } -void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context, bool /*force_wait*/) +void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context) { /// Firstly acquire lock for mutation, that locks changes of data. /// We cannot acquire rwlock here, because read lock is needed @@ -175,7 +179,8 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, getStorageID().getNameForLogs()); if (analyzed_join->getClauses().size() != 1) - throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "JOIN keys should match to the Join engine keys [{}]", fmt::join(getKeyNames(), ", ")); + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "JOIN keys should match to the Join engine keys [{}]", + fmt::join(getKeyNames(), ", ")); const auto & join_on = analyzed_join->getOnlyClause(); if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) @@ -319,16 +324,16 @@ void registerStorageJoin(StorageFactory & factory) persistent = setting.value.get(); } else - throw Exception("Unknown setting " + setting.name + " for storage " + args.engine_name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown setting {} for storage {}", setting.name, args.engine_name); } } DiskPtr disk = args.getContext()->getDisk(disk_name); if (engine_args.size() < 3) - throw Exception( - "Storage Join requires at least 3 parameters: Join(ANY|ALL|SEMI|ANTI, LEFT|INNER|RIGHT, keys...).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Join requires at least 3 parameters: " + "Join(ANY|ALL|SEMI|ANTI, LEFT|INNER|RIGHT, keys...)."); JoinStrictness strictness = JoinStrictness::Unspecified; JoinKind kind = JoinKind::Comma; @@ -353,8 +358,7 @@ void registerStorageJoin(StorageFactory & factory) } if (strictness == JoinStrictness::Unspecified) - throw Exception("First parameter of storage Join must be ANY or ALL or SEMI or ANTI (without quotes).", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First parameter of storage Join must be ANY or ALL or SEMI or ANTI (without quotes)."); if (auto opt_kind_id = tryGetIdentifierName(engine_args[1])) { @@ -375,8 +379,7 @@ void registerStorageJoin(StorageFactory & factory) } if (kind == JoinKind::Comma) - throw Exception("Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes).", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); Names key_names; key_names.reserve(engine_args.size() - 2); @@ -384,7 +387,7 @@ void registerStorageJoin(StorageFactory & factory) { auto opt_key = tryGetIdentifierName(engine_args[i]); if (!opt_key) - throw Exception("Parameter №" + toString(i + 1) + " of storage Join don't look like column name.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter №{} of storage Join don't look like column name.", i + 1); key_names.push_back(*opt_key); } @@ -477,7 +480,7 @@ protected: Chunk chunk; if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(), [&](auto kind, auto strictness, auto & map) { chunk = createChunk(map); })) - throw Exception("Logical error: unknown JOIN strictness", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown JOIN strictness"); return chunk; } @@ -512,8 +515,8 @@ private: #undef M default: - throw Exception("Unsupported JOIN keys in StorageJoin. Type: " + toString(static_cast(join->data->type)), - ErrorCodes::UNSUPPORTED_JOIN_KEYS); + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys in StorageJoin. Type: {}", + static_cast(join->data->type)); } if (!rows_added) @@ -587,7 +590,7 @@ private: fillAll(columns, column_indices, it, key_pos, rows_added); } else - throw Exception("This JOIN is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This JOIN is not implemented yet"); if (rows_added >= max_block_size) { diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 96afd442c72..61ea743c841 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -45,7 +45,7 @@ public: /// Only delete is supported. void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override; - void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override; + void mutate(const MutationCommands & commands, ContextPtr context) override; /// Return instance of HashJoin holding lock that protects from insertions to StorageJoin. /// HashJoin relies on structure of hash table that's why we need to return it with locked mutex. diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index bd255a952dc..923e807c5cd 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -264,9 +264,9 @@ StorageKeeperMap::StorageKeeperMap( metadata_string = out.str(); if (root_path.empty()) - throw Exception("root_path should not be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "root_path should not be empty"); if (!root_path.starts_with('/')) - throw Exception("root_path should start with '/'", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "root_path should start with '/'"); auto config_keys_limit = context_->getConfigRef().getUInt64("keeper_map_keys_limit", 0); if (config_keys_limit != 0 && (keys_limit == 0 || keys_limit > config_keys_limit)) @@ -397,7 +397,9 @@ StorageKeeperMap::StorageKeeperMap( return; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot create metadata for table, because it is removed concurrently or because " + "of wrong root_path ({})", root_path); } @@ -763,12 +765,12 @@ StoragePtr create(const StorageFactory::Arguments & args) metadata.setConstraints(args.constraints); if (!args.storage_def->primary_key) - throw Exception("StorageKeeperMap requires one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageKeeperMap requires one column in primary key"); metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.getContext()); auto primary_key_names = metadata.getColumnsRequiredForPrimaryKey(); if (primary_key_names.size() != 1) - throw Exception("StorageKeeperMap requires one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageKeeperMap requires one column in primary key"); return std::make_shared( args.getContext(), args.table_id, metadata, args.query.attach, primary_key_names[0], root_path, keys_limit); diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index b8920647244..7d445c0d7ec 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -213,7 +213,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu const auto & data_file_it = storage.data_files_by_names.find(data_file_name); if (data_file_it == storage.data_files_by_names.end()) - throw Exception("Logical error: no information about file " + data_file_name + " in StorageLog", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; size_t offset = stream_for_prefix ? 0 : offsets[data_file.index]; @@ -274,7 +274,7 @@ public: , lock(std::move(lock_)) { if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Ensure that marks are loaded because we're going to update them. storage.loadMarks(lock); @@ -417,8 +417,7 @@ ISerialization::OutputStreamGetter LogSink::createStreamGetter(const NameAndType String data_file_name = ISerialization::getFileNameForStream(name_and_type, path); auto it = streams.find(data_file_name); if (it == streams.end()) - throw Exception("Logical error: stream was not created when writing data in LogSink", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: stream was not created when writing data in LogSink"); Stream & stream = it->second; if (stream.written) @@ -443,7 +442,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c { const auto & data_file_it = storage.data_files_by_names.find(data_file_name); if (data_file_it == storage.data_files_by_names.end()) - throw Exception("Logical error: no information about file " + data_file_name + " in StorageLog", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; const auto & columns = metadata_snapshot->getColumns(); @@ -553,7 +552,7 @@ StorageLog::StorageLog( setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Storage {} requires data path", getName()); /// Enumerate data files. for (const auto & column : storage_metadata.getColumns().getAllPhysical()) @@ -592,8 +591,8 @@ StorageLog::StorageLog( void StorageLog::addDataFiles(const NameAndTypePair & column) { if (data_files_by_names.contains(column.name)) - throw Exception("Duplicate column with name " + column.name + " in constructor of StorageLog.", - ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Duplicate column with name {} in constructor of StorageLog.", + column.name); ISerialization::StreamCallback stream_callback = [&] (const ISerialization::SubstreamPath & substream_path) { @@ -624,7 +623,7 @@ void StorageLog::loadMarks(std::chrono::seconds lock_timeout) /// a data race between two threads trying to load marks simultaneously. WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); loadMarks(lock); } @@ -639,7 +638,7 @@ void StorageLog::loadMarks(const WriteLock & lock /* already locked exclusively { size_t file_size = disk->getFileSize(marks_file_path); if (file_size % (num_data_files * sizeof(Mark)) != 0) - throw Exception("Size of marks file is inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT); + throw Exception(ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT, "Size of marks file is inconsistent"); num_marks = file_size / (num_data_files * sizeof(Mark)); @@ -677,7 +676,7 @@ void StorageLog::saveMarks(const WriteLock & /* already locked for writing */) for (const auto & data_file : data_files) { if (data_file.marks.size() != num_marks) - throw Exception("Wrong number of marks generated from block. Makes no sense.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of marks generated from block. Makes no sense."); } size_t start = num_marks_saved; @@ -756,7 +755,7 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr { WriteLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); disk->clearDirectory(table_path); @@ -791,7 +790,7 @@ Pipe StorageLog::read( ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); if (!num_data_files || !file_checker.getFileSize(data_files[INDEX_WITH_REAL_ROW_COUNT].path)) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); @@ -855,7 +854,7 @@ SinkToStoragePtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetada { WriteLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return std::make_shared(*this, metadata_snapshot, std::move(lock)); } @@ -864,7 +863,7 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr local_ { ReadLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return file_checker.check(); } @@ -874,7 +873,7 @@ IStorage::ColumnSizeByName StorageLog::getColumnSizes() const { ReadLock lock{rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); ColumnSizeByName column_sizes; @@ -932,7 +931,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); if (!num_data_files || !file_checker.getFileSize(data_files[INDEX_WITH_REAL_ROW_COUNT].path)) return; @@ -1009,7 +1008,7 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p { WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Load the marks if not loaded yet. We have to do that now because we're going to update these marks. loadMarks(lock); @@ -1045,7 +1044,7 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p size_t file_size = backup->getFileSize(file_path_in_backup); if (file_size % (num_data_files * sizeof(Mark)) != 0) - throw Exception("Size of marks file is inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT); + throw Exception(ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT, "Size of marks file is inconsistent"); num_extra_marks = file_size / (num_data_files * sizeof(Mark)); @@ -1102,9 +1101,8 @@ void registerStorageLog(StorageFactory & factory) auto create_fn = [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); String disk_name = getDiskName(*args.storage_def); DiskPtr disk = args.getContext()->getDisk(disk_name); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index cbb59e508e8..9896265b576 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -44,7 +44,7 @@ public: private: [[noreturn]] static void throwNotAllowed() { - throw Exception("This method is not allowed for MaterializedMySQL", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This method is not allowed for MaterializedMySQL"); } StoragePtr nested_storage; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index fefa83fd6fe..971ecf8dbf2 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -70,17 +70,17 @@ StorageMaterializedView::StorageMaterializedView( storage_metadata.setColumns(columns_); if (!query.select) - throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// If the destination table is not set, use inner table has_inner_table = query.to_table_id.empty(); if (has_inner_table && !query.storage) - throw Exception( - "You must specify where to save results of a MaterializedView query: either ENGINE or an existing table in a TO clause", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "You must specify where to save results of a MaterializedView query: " + "either ENGINE or an existing table in a TO clause"); if (query.select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), local_context); storage_metadata.setSelectQuery(select); @@ -230,9 +230,8 @@ void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr void StorageMaterializedView::checkStatementCanBeForwarded() const { if (!has_inner_table) - throw Exception( - "MATERIALIZED VIEW targets existing table " + target_table_id.getNameForLogs() + ". " - + "Execute the statement directly on it.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "MATERIALIZED VIEW targets existing table {}. " + "Execute the statement directly on it.", target_table_id.getNameForLogs()); } bool StorageMaterializedView::optimize( @@ -320,10 +319,10 @@ void StorageMaterializedView::checkAlterPartitionIsPossible( getTargetTable()->checkAlterPartitionIsPossible(commands, metadata_snapshot, settings); } -void StorageMaterializedView::mutate(const MutationCommands & commands, ContextPtr local_context, bool force_wait) +void StorageMaterializedView::mutate(const MutationCommands & commands, ContextPtr local_context) { checkStatementCanBeForwarded(); - getTargetTable()->mutate(commands, local_context, force_wait); + getTargetTable()->mutate(commands, local_context); } void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index c0fee7e870b..af2dedf8164 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -65,7 +65,7 @@ public: void checkAlterPartitionIsPossible(const PartitionCommands & commands, const StorageMetadataPtr & metadata_snapshot, const Settings & settings) const override; - void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override; + void mutate(const MutationCommands & commands, ContextPtr context) override; void renameInMemory(const StorageID & new_table_id) override; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f1b33977e27..881cbc18b10 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -305,7 +305,7 @@ void StorageMemory::checkMutationIsPossible(const MutationCommands & /*commands* /// Some validation will be added } -void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context, bool /*force_wait*/) +void StorageMemory::mutate(const MutationCommands & commands, ContextPtr context) { std::lock_guard lock(mutex); auto metadata_snapshot = getInMemoryMetadataPtr(); diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 2274a27a267..c739088dbe4 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -67,7 +67,7 @@ public: void drop() override; void checkMutationIsPossible(const MutationCommands & commands, const Settings & settings) const override; - void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override; + void mutate(const MutationCommands & commands, ContextPtr context) override; void truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr, TableExclusiveLockHolder &) override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index dd3a1e75151..f4013b6d2c2 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -449,7 +449,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu /// If sampling requested, then check that table supports it. if (query_info.query->as()->sampleSize() && !storage->supportsSampling()) - throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); + throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table doesn't support sampling"); Aliases aliases; auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); @@ -739,7 +739,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( continue; if (query && query->as()->prewhere() && !storage->supportsPrewhere()) - throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE); + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Storage {} doesn't support PREWHERE.", storage->getName()); if (storage.get() != this) { @@ -931,11 +931,11 @@ std::tuple StorageMerge::evaluateDatabaseName(cons if (const auto * func = node->as(); func && func->name == "REGEXP") { if (func->arguments->children.size() != 1) - throw Exception("REGEXP in Merge ENGINE takes only one argument", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "REGEXP in Merge ENGINE takes only one argument"); auto * literal = func->arguments->children[0]->as(); if (!literal || literal->value.getType() != Field::Types::Which::String || literal->value.safeGet().empty()) - throw Exception("Argument for REGEXP in Merge ENGINE should be a non empty String Literal", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument for REGEXP in Merge ENGINE should be a non empty String Literal"); return {true, func->arguments->children[0]}; } @@ -956,9 +956,9 @@ void registerStorageMerge(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() != 2) - throw Exception("Storage Merge requires exactly 2 parameters" - " - name of source database and regexp for table names.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Merge requires exactly 2 parameters - name " + "of source database and regexp for table names."); auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(engine_args[0], args.getLocalContext()); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 177b7cc1c97..4ef34ae91d5 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -107,7 +107,11 @@ StorageMergeTree::StorageMergeTree( loadDataParts(has_force_restore_data_flag); if (!attach && !getDataPartsForInternalUsage().empty()) - throw Exception("Data directory for table already containing data parts - probably it was unclean DROP table or manual intervention. You must either clear directory by hand or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Data directory for table already containing data parts - probably " + "it was unclean DROP table or manual intervention. " + "You must either clear directory by hand or use ATTACH TABLE instead " + "of CREATE TABLE if you need to use that parts."); increment.set(getMaxBlockNumber()); @@ -321,7 +325,7 @@ void StorageMergeTree::alter( /// We cannot place this check into settings sanityCheck because it depends on format_version. /// sanityCheck must work event without storage. if (new_storage_settings->non_replicated_deduplication_window != 0 && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Deduplication for non-replicated MergeTree in old syntax is not supported"); deduplication_log->setDeduplicationWindowSize(new_storage_settings->non_replicated_deduplication_window); } @@ -375,9 +379,9 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( if (!reserved_space) { if (is_mutation) - throw Exception("Not enough space for mutating part '" + future_part->parts[0]->name + "'", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for mutating part '{}'", future_part->parts[0]->name); else - throw Exception("Not enough space for merging parts", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for merging parts"); } future_part->updatePath(storage, reserved_space.get()); @@ -385,7 +389,7 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( for (const auto & part : future_part->parts) { if (storage.currently_merging_mutating_parts.contains(part)) - throw Exception("Tagging already tagged part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Tagging already tagged part {}. This is a bug.", part->name); } storage.currently_merging_mutating_parts.insert(future_part->parts.begin(), future_part->parts.end()); } @@ -528,14 +532,14 @@ void StorageMergeTree::setMutationCSN(const String & mutation_id, CSN csn) it->second.writeCSN(csn); } -void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context, bool force_wait) +void StorageMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context) { /// Validate partition IDs (if any) before starting mutation getPartitionIdsAffectedByCommands(commands, query_context); Int64 version = startMutation(commands, query_context); - if (force_wait || query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction()) + if (query_context->getSettingsRef().mutations_sync > 0 || query_context->getCurrentTransaction()) waitForMutation(version); } @@ -714,7 +718,7 @@ void StorageMergeTree::loadDeduplicationLog() { auto settings = getSettings(); if (settings->non_replicated_deduplication_window != 0 && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Deduplication for non-replicated MergeTree in old syntax is not supported"); auto disk = getDisks()[0]; std::string path = fs::path(relative_data_path) / "deduplication_logs"; @@ -1124,7 +1128,7 @@ MergeMutateSelectedEntryPtr StorageMergeTree::selectPartsToMutate( future_part->parts.push_back(part); future_part->part_info = new_part_info; future_part->name = part->getNewName(new_part_info); - future_part->type = part->getType(); + future_part->part_format = part->getFormat(); tagger = std::make_unique(future_part, MergeTreeDataMergerMutator::estimateNeededDiskSpace({part}), *this, metadata_snapshot, true); return std::make_shared(future_part, std::move(tagger), commands, txn); @@ -1422,7 +1426,7 @@ ActionLock StorageMergeTree::stopMergesAndWait() if (std::cv_status::timeout == currently_processing_in_background_condition.wait_for( lock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC))) { - throw Exception("Timeout while waiting for already running merges", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout while waiting for already running merges"); } } @@ -1546,7 +1550,9 @@ void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_pa DataPartsVector covered_parts_by_one_part = renameTempPartAndReplace(part, transaction); if (covered_parts_by_one_part.size() > 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} expected to cover not more then 1 part. {} covered parts have been found. This is a bug.", + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Part {} expected to cover not more then 1 part. " + "{} covered parts have been found. This is a bug.", part->name, covered_parts_by_one_part.size()); std::move(covered_parts_by_one_part.begin(), covered_parts_by_one_part.end(), std::back_inserter(covered_parts)); @@ -1787,9 +1793,9 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con for (const DataPartPtr & src_part : src_parts) { if (!canReplacePartition(src_part)) - throw Exception( - "Cannot replace partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table", + partition_id, src_part->name); /// This will generate unique name in scope of current server process. Int64 temp_index = insert_increment.get(); @@ -1856,13 +1862,16 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) - throw Exception("Table " + getStorageID().getNameForLogs() + " supports movePartitionToTable only for MergeTree family of table engines." - " Got " + dest_table->getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Table {} supports movePartitionToTable only for MergeTree family of table engines. Got {}", + getStorageID().getNameForLogs(), dest_table->getName()); if (dest_table_storage->getStoragePolicy() != this->getStoragePolicy()) - throw Exception("Destination table " + dest_table_storage->getStorageID().getNameForLogs() + - " should have the same storage policy of source table " + getStorageID().getNameForLogs() + ". " + - getStorageID().getNameForLogs() + ": " + this->getStoragePolicy()->getName() + ", " + - dest_table_storage->getStorageID().getNameForLogs() + ": " + dest_table_storage->getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_POLICY); + throw Exception(ErrorCodes::UNKNOWN_POLICY, + "Destination table {} should have the same storage policy of source table {}. {}: {}, {}: {}", + dest_table_storage->getStorageID().getNameForLogs(), + getStorageID().getNameForLogs(), getStorageID().getNameForLogs(), + this->getStoragePolicy()->getName(), dest_table_storage->getStorageID().getNameForLogs(), + dest_table_storage->getStoragePolicy()->getName()); auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr(); @@ -1880,9 +1889,9 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const for (const DataPartPtr & src_part : src_parts) { if (!dest_table_storage->canReplacePartition(src_part)) - throw Exception( - "Cannot move partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot move partition '{}' because part '{}' has inconsistent granularity with table", + partition_id, src_part->name); /// This will generate unique name in scope of current server process. Int64 temp_index = insert_increment.get(); @@ -1979,8 +1988,8 @@ CheckResults StorageMergeTree::checkData(const ASTPtr & query, ContextPtr local_ } catch (const Exception & ex) { - results.emplace_back(part->name, false, - "Check of part finished with error: '" + ex.message() + "'"); + tryLogCurrentException(log, __PRETTY_FUNCTION__); + results.emplace_back(part->name, false, "Check of part finished with error: '" + ex.message() + "'"); } } else diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 706ceda17b3..1dff6323e4c 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -85,7 +85,7 @@ public: const Names & deduplicate_by_columns, ContextPtr context) override; - void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override; + void mutate(const MutationCommands & commands, ContextPtr context) override; bool hasLightweightDeletedMask() const override; diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 17d3a7cf970..25a303620d6 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -78,7 +78,7 @@ void StorageMongoDB::connectIfNotConnected() { Poco::MongoDB::Database poco_db(auth_db); if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) - throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); + throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password"); } authenticated = true; @@ -200,9 +200,9 @@ StorageMongoDB::Configuration StorageMongoDB::getConfiguration(ASTs engine_args, else { if (engine_args.size() < 5 || engine_args.size() > 6) - throw Exception( - "Storage MongoDB requires from 5 to 6 parameters: MongoDB('host:port', database, collection, 'user', 'password' [, 'options']).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage MongoDB requires from 5 to 6 parameters: " + "MongoDB('host:port', database, collection, 'user', 'password' [, 'options'])."); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); diff --git a/src/Storages/StorageMongoDBSocketFactory.cpp b/src/Storages/StorageMongoDBSocketFactory.cpp index 7308c4b3ce7..6dfcbd7e00e 100644 --- a/src/Storages/StorageMongoDBSocketFactory.cpp +++ b/src/Storages/StorageMongoDBSocketFactory.cpp @@ -46,7 +46,7 @@ Poco::Net::StreamSocket StorageMongoDBSocketFactory::createSecureSocket(const st return socket; #else - throw Exception("SSL is not enabled at build time.", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); + throw Exception(ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME, "SSL is not enabled at build time."); #endif } diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 20eb59c7262..ee647043407 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -264,9 +264,9 @@ StorageMySQLConfiguration StorageMySQL::getConfiguration(ASTs engine_args, Conte else { if (engine_args.size() < 5 || engine_args.size() > 7) - throw Exception( - "Storage MySQL requires 5-7 parameters: MySQL('host:port' (or 'addresses_pattern'), database, table, 'user', 'password'[, replace_query, 'on_duplicate_clause']).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage MySQL requires 5-7 parameters: " + "MySQL('host:port' (or 'addresses_pattern'), database, table, " + "'user', 'password'[, replace_query, 'on_duplicate_clause'])."); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context_); @@ -305,7 +305,7 @@ void registerStorageMySQL(StorageFactory & factory) mysql_settings.loadFromQuery(*args.storage_def); if (!mysql_settings.connection_pool_size) - throw Exception("connection_pool_size cannot be zero.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "connection_pool_size cannot be zero."); mysqlxx::PoolWithFailover pool = createMySQLPoolWithFailover(configuration, mysql_settings); diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 638ebf5109b..aa462e1a40c 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -25,9 +25,8 @@ void registerStorageNull(StorageFactory & factory) factory.registerStorage("Null", [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); return std::make_shared(args.table_id, args.columns, args.constraints, args.comment); }, diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 2ce5f85e11f..2afd9e8a63b 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -132,7 +132,7 @@ public: return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, context); } - void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override { getNested()->mutate(commands, context, force_wait); } + void mutate(const MutationCommands & commands, ContextPtr context) override { getNested()->mutate(commands, context); } CancellationCode killMutation(const String & mutation_id) override { return getNested()->killMutation(mutation_id); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c87527ee740..7a4b97f6e49 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include @@ -143,7 +143,6 @@ namespace ErrorCodes extern const int ABORTED; extern const int REPLICA_IS_NOT_IN_QUORUM; extern const int TABLE_IS_READ_ONLY; - extern const int TABLE_IS_DROPPED; extern const int NOT_FOUND_NODE; extern const int NO_ACTIVE_REPLICAS; extern const int NOT_A_LEADER; @@ -217,7 +216,7 @@ zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeper() const { auto res = tryGetZooKeeper(); if (!res) - throw Exception("Cannot get ZooKeeper", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "Cannot get ZooKeeper"); return res; } @@ -405,7 +404,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (!attach) { dropIfEmpty(); - throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "Can't create replicated table without ZooKeeper"); } has_metadata_in_zookeeper = std::nullopt; @@ -431,10 +430,10 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( has_metadata_in_zookeeper = true; if (!getDataPartsForInternalUsage().empty()) - throw Exception("Data directory for table already contains data parts" - " - probably it was unclean DROP table or manual intervention." - " You must either clear directory by hand or use ATTACH TABLE" - " instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Data directory for table already contains data parts - probably it was unclean DROP table " + "or manual intervention. You must either clear directory by hand " + "or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts."); try { @@ -458,7 +457,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( catch (Coordination::Exception & e) { if (!is_first_replica && e.code == Coordination::Error::ZNONODE) - throw Exception("Table " + zookeeper_path + " was suddenly removed.", ErrorCodes::ALL_REPLICAS_LOST); + throw Exception(ErrorCodes::ALL_REPLICAS_LOST, "Table {} was suddenly removed.", zookeeper_path); else throw; } @@ -582,8 +581,9 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( } if (partial_shutdown_called) - throw Exception("Mutation is not finished because table shutdown was called. It will be done after table restart.", - ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, + "Mutation is not finished because table shutdown was called. " + "It will be done after table restart."); /// Replica inactive, don't check mutation status if (!inactive_replicas.empty() && inactive_replicas.contains(replica)) @@ -799,9 +799,9 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr } /// Do not use LOGICAL_ERROR code, because it may happen if user has specified wrong zookeeper_path - throw Exception("Cannot create table, because it is created concurrently every time " - "or because of wrong zookeeper_path " - "or because of logical error", ErrorCodes::REPLICA_ALREADY_EXISTS); + throw Exception(ErrorCodes::REPLICA_ALREADY_EXISTS, + "Cannot create table, because it is created concurrently every time or because " + "of wrong zookeeper_path or because of logical error"); } void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metadata_snapshot) @@ -904,7 +904,7 @@ void StorageReplicatedMergeTree::drop() /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. if (!zookeeper) - throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Can't drop readonly replicated table (need to drop data in ZooKeeper as well)"); dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings()); } @@ -926,7 +926,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con Poco::Logger * logger, MergeTreeSettingsPtr table_settings) { if (zookeeper->expired()) - throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired."); auto remote_replica_path = zookeeper_path + "/replicas/" + replica; @@ -1286,9 +1286,11 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) LOG_DEBUG(log, sanity_report_debug_fmt, fmt::join(uncovered_unexpected_parts, ", "), fmt::join(parts_to_fetch, ", "), fmt::join(covered_unexpected_parts, ", "), fmt::join(expected_parts, ", ")); throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, sanity_report_fmt, getStorageID().getNameForLogs(), - formatReadableQuantity(uncovered_unexpected_parts_rows), formatReadableQuantity(total_rows_on_filesystem), + formatReadableQuantity(uncovered_unexpected_parts_rows), + formatReadableQuantity(total_rows_on_filesystem), uncovered_unexpected_parts.size(), uncovered_unexpected_parts_rows, unexpected_parts_nonnew, unexpected_parts_nonnew_rows, - parts_to_fetch.size(), parts_to_fetch_blocks, covered_unexpected_parts.size(), unexpected_parts_rows - uncovered_unexpected_parts_rows); + parts_to_fetch.size(), parts_to_fetch_blocks, covered_unexpected_parts.size(), + unexpected_parts_rows - uncovered_unexpected_parts_rows); } if (unexpected_parts_nonnew_rows > 0 || uncovered_unexpected_parts_rows > 0) @@ -1438,11 +1440,10 @@ void StorageReplicatedMergeTree::checkPartChecksumsAndAddCommitOps(const zkutil: } MergeTreeData::DataPartsVector StorageReplicatedMergeTree::checkPartChecksumsAndCommit(Transaction & transaction, - const DataPartPtr & part, std::optional hardlinked_files) + const MutableDataPartPtr & part, std::optional hardlinked_files) { auto zookeeper = getZooKeeper(); - while (true) { Coordination::Requests ops; @@ -1499,7 +1500,7 @@ String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataP getSettings()->use_minimalistic_checksums_in_zookeeper); } -MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFoundValidPart(const LogEntry& entry) const +MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFoundValidPart(const LogEntry & entry) const { if (format_version != MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) return {}; @@ -1516,17 +1517,12 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo if (!part_info || part_info->partition_id != actual_part_info.partition_id) continue; - const String part_old_name = part_info->getPartNameV1(); + const auto part_old_name = part_info->getPartNameV1(); + const auto volume = std::make_shared("volume_" + part_old_name, disk); - const VolumePtr volume = std::make_shared("volume_" + part_old_name, disk); - - auto data_part_storage = std::make_shared( - volume, - fs::path(relative_data_path) / "detached", - part_old_name); - - /// actual_part_info is more recent than part_info so we use it - MergeTreeData::MutableDataPartPtr part = createPart(part_new_name, actual_part_info, data_part_storage); + auto part = getDataPartBuilder(entry.new_part_name, volume, fs::path("detached") / part_old_name) + .withPartFormatFromDisk() + .build(); try { @@ -1541,7 +1537,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo if (entry.part_checksum == part->checksums.getTotalChecksumHex()) { - part->modification_time = data_part_storage->getLastModified().epochTime(); + part->modification_time = part->getDataPartStorage().getLastModified().epochTime(); return part; } } @@ -1665,7 +1661,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che if (entry.quorum) { if (entry.type != LogEntry::GET_PART) - throw Exception("Logical error: log entry with quorum but type is not GET_PART", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum but type is not GET_PART"); LOG_DEBUG(log, "No active replica has part {} which needs to be written with quorum. Will try to mark that quorum as failed.", entry.new_part_name); @@ -1728,8 +1724,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); if (part_info.min_block != part_info.max_block) - throw Exception("Logical error: log entry with quorum for part covering more than one block number", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum for part covering more than one block number"); ops.emplace_back(zkutil::makeCreateRequest( fs::path(zookeeper_path) / "quorum" / "failed_parts" / entry.new_part_name, @@ -1885,8 +1880,11 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) auto drop_range_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range_info.partition_id, drop_range_info.max_block); - queue.removePartProducingOpsInRange(getZooKeeper(), drop_range_info, entry, /* fetch_entry_znode= */ {}); - part_check_thread.cancelRemovedPartsCheck(drop_range_info); + { + auto pause_checking_parts = part_check_thread.pausePartsCheck(); + queue.removePartProducingOpsInRange(getZooKeeper(), drop_range_info, entry, /* fetch_entry_znode= */ {}); + part_check_thread.cancelRemovedPartsCheck(drop_range_info); + } /// Delete the parts contained in the range to be deleted. /// It's important that no old parts remain (after the merge), because otherwise, @@ -1962,6 +1960,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (replace) { getContext()->getMergeList().cancelInPartition(getStorageID(), drop_range.partition_id, drop_range.max_block); + auto pause_checking_parts = part_check_thread.pausePartsCheck(); queue.removePartProducingOpsInRange(getZooKeeper(), drop_range, entry, /* fetch_entry_znode= */ {}); part_check_thread.cancelRemovedPartsCheck(drop_range); } @@ -2227,7 +2226,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) { auto part_desc = part_name_to_desc[final_part_name]; if (!part_desc) - throw Exception("There is no final part " + final_part_name + ". This is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no final part {}. This is a bug", final_part_name); final_parts.emplace_back(part_desc); @@ -2238,8 +2237,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (!prev.found_new_part_info.isDisjoint(curr.found_new_part_info)) { - throw Exception("Intersected final parts detected: " + prev.found_new_part_name - + " and " + curr.found_new_part_name + ". It should be investigated.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Intersected final parts detected: {} and {}. It should be investigated.", + prev.found_new_part_name, curr.found_new_part_name); } } } @@ -2254,7 +2253,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (part_desc->src_table_part) { if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex()) - throw Exception("Checksums of " + part_desc->src_table_part->name + " is suddenly changed", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Checksums of {} is suddenly changed", part_desc->src_table_part->name); auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, NO_TRANSACTION_PTR, &part_desc->hardlinked_files, false, {}); @@ -2271,7 +2270,9 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) String interserver_scheme = getContext()->getInterserverScheme(); if (interserver_scheme != address.scheme) - throw Exception("Interserver schemas are different '" + interserver_scheme + "' != '" + address.scheme + "', can't fetch part from " + address.host, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Interserver schemas are different '{}' != '{}', can't fetch part from {}", + interserver_scheme, address.scheme, address.host); part_desc->res_part = fetcher.fetchSelectedPart( metadata_snapshot, getContext(), part_desc->found_new_part_name, source_replica_path, @@ -2283,7 +2284,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); } else - throw Exception("There is no receipt to produce part " + part_desc->new_part_name + ". This is bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no receipt to produce part {}. This is bug", part_desc->new_part_name); }; /// Download or clone parts @@ -2370,7 +2371,8 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr } if (replica.empty()) - throw Exception(ErrorCodes::NO_REPLICA_HAS_PART, "Not found active replica on shard {} to clone part {}", entry.source_shard, entry.new_part_name); + throw Exception(ErrorCodes::NO_REPLICA_HAS_PART, "Not found active replica on shard {} to clone part {}", + entry.source_shard, entry.new_part_name); LOG_INFO(log, "Will clone part from shard {} and replica {}", entry.source_shard, replica); @@ -2387,9 +2389,8 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr auto get_part = [&, address, timeouts, credentials, interserver_scheme]() { if (interserver_scheme != address.scheme) - throw Exception("Interserver schemes are different: '" + interserver_scheme - + "' != '" + address.scheme + "', can't fetch part from " + address.host, - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Interserver schemes are different: '{}' != '{}', can't fetch part from {}", + interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( metadata_snapshot, getContext(), entry.new_part_name, source_replica_path, @@ -2452,16 +2453,15 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo } else if (rc == Coordination::Error::ZNODEEXISTS) { - throw Exception( - "Can not clone replica, because the " + source_replica + " updated to new ClickHouse version", - ErrorCodes::REPLICA_STATUS_CHANGED); + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, + "Can not clone replica, because the {} updated to new ClickHouse version", source_replica); } else if (responses[1]->error == Coordination::Error::ZBADVERSION) { /// If is_lost node version changed than source replica also lost, /// so we cannot clone from it. - throw Exception( - "Can not clone replica, because the " + source_replica + " became lost", ErrorCodes::REPLICA_STATUS_CHANGED); + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Can not clone replica, because the {} became lost", + source_replica); } else if (responses.back()->error == Coordination::Error::ZBADVERSION) { @@ -3214,7 +3214,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask() future_merged_part->parts, future_merged_part->name, future_merged_part->uuid, - future_merged_part->type, + future_merged_part->part_format, deduplicate, deduplicate_by_columns, nullptr, @@ -3305,7 +3305,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c const DataPartsVector & parts, const String & merged_name, const UUID & merged_part_uuid, - const MergeTreeDataPartType & merged_part_type, + const MergeTreeDataPartFormat & merged_part_format, bool deduplicate, const Names & deduplicate_by_columns, ReplicatedMergeTreeLogEntryData * out_log_entry, @@ -3343,7 +3343,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c entry.source_replica = replica_name; entry.new_part_name = merged_name; entry.new_part_uuid = merged_part_uuid; - entry.new_part_type = merged_part_type; + entry.new_part_format = merged_part_format; entry.merge_type = merge_type; entry.deduplicate = deduplicate; entry.deduplicate_by_columns = deduplicate_by_columns; @@ -3759,6 +3759,9 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart( */ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_parallel) { + if (is_parallel && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Parallel quorum inserts are not compatible with the deprecated syntax of *MergeTree engines"); + auto zookeeper = getZooKeeper(); /// Information on which replicas a part has been added, if the quorum has not yet been reached. @@ -3867,6 +3870,8 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) { auto zookeeper = getZooKeeper(); + LOG_DEBUG(log, "Cleaning up last parent node for partition {}", partition_id); + /// The name of the previous part for which the quorum was reached. const String quorum_last_part_path = fs::path(zookeeper_path) / "quorum" / "last_part"; @@ -3897,6 +3902,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) if (code == Coordination::Error::ZOK) { + LOG_DEBUG(log, "Last parent node for partition {} is cleaned up", partition_id); break; } else if (code == Coordination::Error::ZNONODE) @@ -3918,7 +3924,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) bool StorageReplicatedMergeTree::partIsInsertingWithParallelQuorum(const MergeTreePartInfo & part_info) const { auto zookeeper = getZooKeeper(); - return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameAndCheckFormat(format_version)); + return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameV1()); } @@ -4084,9 +4090,8 @@ bool StorageReplicatedMergeTree::fetchPart( get_part = [&, address, timeouts, credentials, interserver_scheme]() { if (interserver_scheme != address.scheme) - throw Exception("Interserver schemes are different: '" + interserver_scheme - + "' != '" + address.scheme + "', can't fetch part from " + address.host, - ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH); + throw Exception(ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH, "Interserver schemes are different: " + "'{}' != '{}', can't fetch part from {}", interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( metadata_snapshot, @@ -4260,9 +4265,8 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( get_part = [&, address, timeouts, interserver_scheme, credentials]() { if (interserver_scheme != address.scheme) - throw Exception("Interserver schemes are different: '" + interserver_scheme - + "' != '" + address.scheme + "', can't fetch part from " + address.host, - ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH); + throw Exception(ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH, "Interserver schemes are different: " + "'{}' != '{}', can't fetch part from {}", interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( metadata_snapshot, getContext(), part_name, source_replica_path, @@ -4277,7 +4281,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( part = get_part(); if (part->getDataPartStorage().getDiskName() != replaced_disk->getName()) - throw Exception("Part " + part->name + " fetched on wrong disk " + part->getDataPartStorage().getDiskName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} fetched on wrong disk {}", part->name, part->getDataPartStorage().getDiskName()); auto replaced_path = fs::path(replaced_part_path); part->getDataPartStorage().rename(replaced_path.parent_path(), replaced_path.filename(), nullptr, true, false); @@ -4735,7 +4739,7 @@ std::optional StorageReplicatedMergeTree::distributedWrite(const const Settings & settings = local_context->getSettingsRef(); if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); auto & select = query.select->as(); @@ -4791,10 +4795,10 @@ bool StorageReplicatedMergeTree::optimize( auto handle_noop = [&](FormatStringHelper fmt_string, Args && ...args) { - PreformattedMessage message = fmt_string.format(std::forward(args)...); + PreformattedMessage message = fmt_string.format(std::forward(args)...); LOG_DEBUG(log, message); if (query_context->getSettingsRef().optimize_throw_if_noop) - throw Exception(message, ErrorCodes::CANNOT_ASSIGN_OPTIMIZE); + throw Exception(std::move(message), ErrorCodes::CANNOT_ASSIGN_OPTIMIZE); return false; }; @@ -4863,9 +4867,12 @@ bool StorageReplicatedMergeTree::optimize( ReplicatedMergeTreeLogEntryData merge_entry; CreateMergeEntryResult create_result = createLogEntryToMergeParts( zookeeper, future_merged_part->parts, - future_merged_part->name, future_merged_part->uuid, future_merged_part->type, + future_merged_part->name, + future_merged_part->uuid, + future_merged_part->part_format, deduplicate, deduplicate_by_columns, - &merge_entry, can_merge.getVersion(), future_merged_part->merge_type); + &merge_entry, can_merge.getVersion(), + future_merged_part->merge_type); if (create_result == CreateMergeEntryResult::MissingPart) { @@ -5216,14 +5223,17 @@ void StorageReplicatedMergeTree::alter( else if (rc == Coordination::Error::ZBADVERSION) { if (results[0]->error != Coordination::Error::ZOK) - throw Exception("Metadata on replica is not up to date with common metadata in Zookeeper. It means that this replica still not applied some of previous alters." - " Probably too many alters executing concurrently (highly not recommended). You can retry this error", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Metadata on replica is not up to date with common metadata in Zookeeper. " + "It means that this replica still not applied some of previous alters." + " Probably too many alters executing concurrently (highly not recommended). " + "You can retry this error"); /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception("Cannot execute alter, because mutations version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because mutations version was suddenly changed due " + "to concurrent alter"); continue; } @@ -5380,7 +5390,7 @@ void StorageReplicatedMergeTree::dropPartNoWaitNoThrow(const String & part_name) { assertNotReadonly(); if (!is_leader) - throw Exception("DROP PART cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "DROP PART cannot be done on this replica because it is not a leader"); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); LogEntry entry; @@ -5392,7 +5402,7 @@ void StorageReplicatedMergeTree::dropPart(const String & part_name, bool detach, { assertNotReadonly(); if (!is_leader) - throw Exception("DROP PART cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "DROP PART cannot be done on this replica because it is not a leader"); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); LogEntry entry; @@ -5421,7 +5431,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de { assertNotReadonly(); if (!is_leader) - throw Exception("DROP PARTITION cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "DROP PARTITION cannot be done on this replica because it is not a leader"); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); @@ -5450,7 +5460,7 @@ void StorageReplicatedMergeTree::truncate( assertNotReadonly(); if (!is_leader) - throw Exception("TRUNCATE cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "TRUNCATE cannot be done on this replica because it is not a leader"); waitForOutdatedPartsToBeLoaded(); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); @@ -5524,18 +5534,19 @@ void StorageReplicatedMergeTree::checkTableCanBeRenamed(const StorageID & new_na return; } - throw Exception( - "Cannot rename Replicated table, because zookeeper_path contains implicit 'database' or 'table' macro. " - "We cannot rename path in ZooKeeper, so path may become inconsistent with table name. If you really want to rename table, " - "you should edit metadata file first and restart server or reattach the table.", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Cannot rename Replicated table, because zookeeper_path contains implicit 'database' " + "or 'table' macro. We cannot rename path " + "in ZooKeeper, so path may become inconsistent with table name. " + "If you really want to rename table, you should edit metadata file first and restart server or reattach the table."); } assert(renaming_restrictions == RenamingRestrictions::ALLOW_PRESERVING_UUID); if (!new_name.hasUUID() && getStorageID().hasUUID()) - throw Exception("Cannot move Replicated table to Ordinary database, because zookeeper_path contains implicit 'uuid' macro. " - "If you really want to rename table, " - "you should edit metadata file first and restart server or reattach the table.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Cannot move Replicated table to Ordinary database, because zookeeper_path contains implicit " + "'uuid' macro. If you really want to rename table, you should edit metadata file first " + "and restart server or reattach the table."); } void StorageReplicatedMergeTree::rename(const String & new_path_to_table_data, const StorageID & new_table_id) @@ -5823,7 +5834,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( } else { - throw Exception("Logical error: unexpected name of log node: " + entry.znode_name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected name of log node: {}", entry.znode_name); } /** Second - find the corresponding entry in the queue of the specified replica. @@ -6080,7 +6091,7 @@ void StorageReplicatedMergeTree::fetchPartition( String auxiliary_zookeeper_name = zkutil::extractZooKeeperName(expand_from); String from = zkutil::extractZooKeeperPath(expand_from, /* check_starts_with_slash */ true); if (from.empty()) - throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "ZooKeeper path should not be empty"); zkutil::ZooKeeperPtr zookeeper; if (auxiliary_zookeeper_name != default_zookeeper_name) @@ -6102,7 +6113,7 @@ void StorageReplicatedMergeTree::fetchPartition( * Unreliable (there is a race condition) - such a part may appear a little later. */ if (checkIfDetachedPartExists(part_name)) - throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Detached part " + part_name + " already exists."); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Detached part {} already exists.", part_name); LOG_INFO(log, "Will fetch part {} from shard {} (zookeeper '{}')", part_name, from_, auxiliary_zookeeper_name); try @@ -6129,7 +6140,7 @@ void StorageReplicatedMergeTree::fetchPartition( * Unreliable (there is a race condition) - such a partition may appear a little later. */ if (checkIfDetachedPartitionExists(partition_id)) - throw Exception("Detached partition " + partition_id + " already exists.", ErrorCodes::PARTITION_ALREADY_EXISTS); + throw Exception(ErrorCodes::PARTITION_ALREADY_EXISTS, "Detached partition {} already exists.", partition_id); zkutil::Strings replicas; zkutil::Strings active_replicas; @@ -6147,7 +6158,7 @@ void StorageReplicatedMergeTree::fetchPartition( active_replicas.push_back(replica); if (active_replicas.empty()) - throw Exception("No active replicas for shard " + from, ErrorCodes::NO_ACTIVE_REPLICAS); + throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "No active replicas for shard {}", from); /** You must select the best (most relevant) replica. * This is a replica with the maximum `log_pointer`, then with the minimum `queue` size. @@ -6180,7 +6191,7 @@ void StorageReplicatedMergeTree::fetchPartition( } if (best_replica.empty()) - throw Exception("Logical error: cannot choose best replica.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot choose best replica."); LOG_INFO(log, "Found {} replicas, {} of them are active. Selected {} to fetch from.", replicas.size(), active_replicas.size(), best_replica); @@ -6201,7 +6212,7 @@ void StorageReplicatedMergeTree::fetchPartition( LOG_INFO(log, "Some of parts ({}) are missing. Will try to fetch covering parts.", missing_parts.size()); if (try_no >= query_context->getSettings().max_fetch_partition_retries_count) - throw Exception("Too many retries to fetch parts from " + best_replica_path, ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS); + throw Exception(ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS, "Too many retries to fetch parts from {}", best_replica_path); Strings parts = zookeeper->getChildren(fs::path(best_replica_path) / "parts"); ActiveDataPartSet active_parts_set(format_version, parts); @@ -6222,7 +6233,7 @@ void StorageReplicatedMergeTree::fetchPartition( parts_to_fetch = std::move(parts_to_fetch_partition); if (parts_to_fetch.empty()) - throw Exception("Partition " + partition_id + " on " + best_replica_path + " doesn't exist", ErrorCodes::PARTITION_DOESNT_EXIST); + throw Exception(ErrorCodes::PARTITION_DOESNT_EXIST, "Partition {} on {} doesn't exist", partition_id, best_replica_path); } else { @@ -6265,7 +6276,7 @@ void StorageReplicatedMergeTree::fetchPartition( } -void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context, bool force_wait) +void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, ContextPtr query_context) { /// Overview of the mutation algorithm. /// @@ -6369,8 +6380,9 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte { /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception("Cannot execute alter, because mutations version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because mutations version was suddenly changed due " + "to concurrent alter"); LOG_TRACE(log, "Version conflict when trying to create a mutation node, retrying..."); continue; } @@ -6378,8 +6390,7 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte throw Coordination::Exception("Unable to create a mutation znode", rc); } - const size_t mutations_sync = force_wait ? 2 : query_context->getSettingsRef().mutations_sync; - waitMutation(mutation_entry.znode_name, mutations_sync); + waitMutation(mutation_entry.znode_name, query_context->getSettingsRef().mutations_sync); } void StorageReplicatedMergeTree::waitMutation(const String & znode_name, size_t mutations_sync) const @@ -6906,9 +6917,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( /// Save deduplication block ids with special prefix replace_partition if (!canReplacePartition(src_part)) - throw Exception( - "Cannot replace partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot replace partition '{}' because part '{}" + "' has inconsistent granularity with table", partition_id, src_part->name); String hash_hex = src_part->checksums.getTotalChecksumHex(); @@ -7011,9 +7022,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( { /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception( - "Cannot execute alter, because alter partition version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because alter partition version was suddenly changed due " + "to concurrent alter"); continue; } else @@ -7070,13 +7081,16 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) - throw Exception("Table " + getStorageID().getNameForLogs() + " supports movePartitionToTable only for ReplicatedMergeTree family of table engines." - " Got " + dest_table->getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Table {} supports movePartitionToTable only for ReplicatedMergeTree family of table engines. " + "Got {}", getStorageID().getNameForLogs(), dest_table->getName()); if (dest_table_storage->getStoragePolicy() != this->getStoragePolicy()) - throw Exception("Destination table " + dest_table_storage->getStorageID().getNameForLogs() + - " should have the same storage policy of source table " + getStorageID().getNameForLogs() + ". " + - getStorageID().getNameForLogs() + ": " + this->getStoragePolicy()->getName() + ", " + - getStorageID().getNameForLogs() + ": " + dest_table_storage->getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_POLICY); + throw Exception(ErrorCodes::UNKNOWN_POLICY, + "Destination table {} should have the same storage policy of source table {}. {}: {}, {}: {}", + dest_table_storage->getStorageID().getNameForLogs(), + getStorageID().getNameForLogs(), getStorageID().getNameForLogs(), + this->getStoragePolicy()->getName(), getStorageID().getNameForLogs(), + dest_table_storage->getStoragePolicy()->getName()); auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr(); @@ -7142,9 +7156,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta for (const auto & src_part : src_all_parts) { if (!dest_table_storage->canReplacePartition(src_part)) - throw Exception( - "Cannot move partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot move partition '{}' because part '{}" + "' has inconsistent granularity with table", partition_id, src_part->name); String hash_hex = src_part->checksums.getTotalChecksumHex(); String block_id_path; @@ -7300,7 +7314,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta return; } - throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, "Cannot assign ALTER PARTITION, because another ALTER PARTITION query was concurrently executed"); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot assign ALTER PARTITION, because another ALTER PARTITION query was concurrently executed"); } void StorageReplicatedMergeTree::movePartitionToShard( @@ -7309,10 +7324,10 @@ void StorageReplicatedMergeTree::movePartitionToShard( /// This is a lightweight operation that only optimistically checks if it could succeed and queues tasks. if (!move_part) - throw Exception("MOVE PARTITION TO SHARD is not supported, use MOVE PART instead", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MOVE PARTITION TO SHARD is not supported, use MOVE PART instead"); if (zkutil::normalizeZooKeeperPath(zookeeper_path, /* check_starts_with_slash */ true) == zkutil::normalizeZooKeeperPath(to, /* check_starts_with_slash */ true)) - throw Exception("Source and destination are the same", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Source and destination are the same"); auto zookeeper = getZooKeeperAndAssertNotReadonly(); @@ -7334,13 +7349,15 @@ void StorageReplicatedMergeTree::movePartitionToShard( { if (partIsLastQuorumPart(part->info)) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Part {} is last inserted part with quorum in partition. Would not be able to drop", part_name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Part {} is last inserted part with quorum in partition. Would not be able to drop", + part_name); } /// canMergeSinglePart is overlapping with dropPart, let's try to use the same code. String out_reason; if (!merge_pred.canMergeSinglePart(part, &out_reason)) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: " + out_reason); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part is busy, reason: {}", out_reason); } { @@ -7362,7 +7379,7 @@ void StorageReplicatedMergeTree::movePartitionToShard( } if (src_pins.part_uuids.contains(part->uuid) || dst_pins.part_uuids.contains(part->uuid)) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part {} has it's uuid ({}) already pinned.", part_name, toString(part->uuid)); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part {} has it's uuid ({}) already pinned.", part_name, part->uuid); src_pins.part_uuids.insert(part->uuid); dst_pins.part_uuids.insert(part->uuid); @@ -7530,7 +7547,7 @@ bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UI return false; if (partial_shutdown_called) - throw Exception("Shutdown is called for table", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Shutdown is called for table"); } return true; @@ -7570,14 +7587,14 @@ bool StorageReplicatedMergeTree::dropPartImpl( if (!merge_pred.canMergeSinglePart(part, &out_reason)) { if (throw_if_noop) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, out_reason); + throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } if (merge_pred.partParticipatesInReplaceRange(part, &out_reason)) { if (throw_if_noop) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, out_reason); + throw Exception::createDeprecated(out_reason, ErrorCodes::PART_IS_TEMPORARILY_LOCKED); return false; } @@ -7729,9 +7746,9 @@ void StorageReplicatedMergeTree::dropAllPartsInPartitions( { /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception( - "Cannot execute alter, because alter partition version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because alter partition version was suddenly changed due " + "to concurrent alter"); continue; } else @@ -7767,6 +7784,9 @@ StorageReplicatedMergeTree::LogEntryPtr StorageReplicatedMergeTree::dropAllParts void StorageReplicatedMergeTree::enqueuePartForCheck(const String & part_name, time_t delay_to_check_seconds) { MergeTreePartInfo covering_drop_range; + /// NOTE This check is just an optimization, it's not reliable for two reasons: + /// (1) drop entry could be removed concurrently and (2) it does not take REPLACE_RANGE into account. + /// See also ReplicatedMergeTreePartCheckThread::cancelRemovedPartsCheck if (queue.hasDropRange(MergeTreePartInfo::fromPartName(part_name, format_version), &covering_drop_range)) { LOG_WARNING(log, "Do not enqueue part {} for check because it's covered by DROP_RANGE {} and going to be removed", @@ -7796,6 +7816,7 @@ CheckResults StorageReplicatedMergeTree::checkData(const ASTPtr & query, Context } catch (const Exception & ex) { + tryLogCurrentException(log, __PRETTY_FUNCTION__); results.emplace_back(part->name, false, "Check of part finished with error: '" + ex.message() + "'"); } } @@ -7894,21 +7915,17 @@ String StorageReplicatedMergeTree::getTableSharedID() const { std::lock_guard lock(table_shared_id_mutex); - /// Can happen if table was partially initialized before drop by DatabaseCatalog - if (table_shared_id == UUIDHelpers::Nil) + /// If we has metadata or, we don't know about metadata -- try to create shared ID + /// Otherwise table is already dropped, doesn't make sense to do anything with shared ID + if (has_metadata_in_zookeeper.value_or(true)) { - if (has_metadata_in_zookeeper.has_value()) - { - if (*has_metadata_in_zookeeper) - createTableSharedID(); - else - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is already dropped", getStorageID().getNameForLogs()); - } - else - { - throw Exception(ErrorCodes::NO_ZOOKEEPER, "No connection to ZooKeeper, cannot get shared table ID for table {}. " - "It will be resolve automatically when connection will be established", getStorageID().getNameForLogs()); - } + /// Can happen if table was partially initialized before drop by DatabaseCatalog + if (table_shared_id == UUIDHelpers::Nil) + createTableSharedID(); + } + else + { + return toString(UUIDHelpers::Nil); } return toString(table_shared_id); @@ -7955,6 +7972,11 @@ void StorageReplicatedMergeTree::createTableSharedID() const id = zookeeper->get(zookeeper_table_id_path); LOG_DEBUG(log, "Shared ID on path {} concurrently created, will set ID {}", zookeeper_table_id_path, id); } + else if (code == Coordination::Error::ZNONODE) /// table completely dropped, we can choose any id we want + { + id = toString(UUIDHelpers::Nil); + LOG_DEBUG(log, "Table was completely drop, we can use anything as ID (will use {})", id); + } else if (code != Coordination::Error::ZOK) { throw zkutil::KeeperException(code, zookeeper_table_id_path); @@ -8096,6 +8118,13 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co return std::make_pair(true, NameSet{}); } + auto shared_id = getTableSharedID(); + if (shared_id == toString(UUIDHelpers::Nil)) + { + LOG_TRACE(log, "Part {} blobs can be removed, because table {} completely dropped", part.name, getStorageID().getNameForLogs()); + return std::make_pair(true, NameSet{}); + } + /// If part is temporary refcount file may be absent if (part.getDataPartStorage().exists(IMergeTreeDataPart::FILE_FOR_REFERENCES_CHECK)) { @@ -8135,7 +8164,7 @@ StorageReplicatedMergeTree::unlockSharedData(const IMergeTreeDataPart & part, co return std::make_pair(true, NameSet{}); return unlockSharedDataByID( - part.getUniqueId(), getTableSharedID(), part.name, replica_name, + part.getUniqueId(), shared_id, part.name, replica_name, part.getDataPartStorage().getDiskType(), zookeeper, *getSettings(), log, zookeeper_path, format_version); } @@ -8555,7 +8584,6 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP LOG_INFO(log, "Going to replace lost part {} with empty part", lost_part_name); auto new_part_info = MergeTreePartInfo::fromPartName(lost_part_name, format_version); - auto metadata_snapshot = getInMemoryMetadataPtr(); MergeTreePartition partition; @@ -8603,7 +8631,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP /// source replica will replace lost part with empty part and we /// will fetch this empty part instead of our source parts. This /// will make replicas consistent, but some data will be lost. - throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to create empty part {}, but it replaces existing parts {}.", lost_part_name, fmt::join(part_names, ", ")); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Tried to create empty part {}, but it replaces existing parts {}.", + lost_part_name, fmt::join(part_names, ", ")); } lockSharedData(*new_data_part, false, {}); @@ -8650,7 +8680,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } else { - throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists on replica {} on path {}", lost_part_name, replica, current_part_path); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, + "Part {} already exists on replica {} on path {}", + lost_part_name, replica, current_part_path); } } @@ -8724,7 +8756,8 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( } else if (error == Coordination::Error::ZNONODE && mode != zkutil::CreateMode::Persistent) { - throw Exception(ErrorCodes::NOT_FOUND_NODE, "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); + throw Exception(ErrorCodes::NOT_FOUND_NODE, + "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); } } else @@ -8750,7 +8783,8 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( else if (error == Coordination::Error::ZNONODE && mode != zkutil::CreateMode::Persistent) { /// Ephemeral locks used during fetches so if parent node was removed we cannot do anything - throw Exception(ErrorCodes::NOT_FOUND_NODE, "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); + throw Exception(ErrorCodes::NOT_FOUND_NODE, + "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); } } } @@ -8766,7 +8800,9 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( if (!created) { String mode_str = mode == zkutil::CreateMode::Persistent ? "persistent" : "ephemeral"; - throw Exception(ErrorCodes::NOT_FOUND_NODE, "Cannot create {} zero copy lock {} because part was unlocked from zookeeper", mode_str, zookeeper_node); + throw Exception(ErrorCodes::NOT_FOUND_NODE, + "Cannot create {} zero copy lock {} because part was unlocked from zookeeper", + mode_str, zookeeper_node); } } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 71f143aa8a4..042e6acf4e2 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -153,7 +153,7 @@ public: void alter(const AlterCommands & commands, ContextPtr query_context, AlterLockHolder & table_lock_holder) override; - void mutate(const MutationCommands & commands, ContextPtr context, bool force_wait) override; + void mutate(const MutationCommands & commands, ContextPtr context) override; void waitMutation(const String & znode_name, size_t mutations_sync) const; std::vector getMutationsStatus() const override; CancellationCode killMutation(const String & mutation_id) override; @@ -528,7 +528,7 @@ private: String getChecksumsForZooKeeper(const MergeTreeDataPartChecksums & checksums) const; /// Accepts a PreActive part, atomically checks its checksums with ones on other replicas and commit the part - DataPartsVector checkPartChecksumsAndCommit(Transaction & transaction, const DataPartPtr & part, std::optional hardlinked_files = {}); + DataPartsVector checkPartChecksumsAndCommit(Transaction & transaction, const MutableDataPartPtr & part, std::optional hardlinked_files = {}); bool partIsAssignedToBackgroundOperation(const DataPartPtr & part) const override; @@ -629,7 +629,7 @@ private: const DataPartsVector & parts, const String & merged_name, const UUID & merged_part_uuid, - const MergeTreeDataPartType & merged_part_type, + const MergeTreeDataPartFormat & merged_part_format, bool deduplicate, const Names & deduplicate_by_columns, ReplicatedMergeTreeLogEntryData * out_log_entry, diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 736aa0d8a5c..205b0c7d067 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -156,7 +156,7 @@ public: , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) - throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); @@ -416,6 +416,7 @@ public: const std::string & version_id_, const std::vector & keys_, const String & bucket_, + const S3Settings::RequestSettings & request_settings_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_, @@ -469,7 +470,7 @@ public: /// (which means we eventually need this info anyway, so it should be ok to do it now) if (object_infos_) { - info = S3::getObjectInfo(client_, bucket, key, version_id_); + info = S3::getObjectInfo(client_, bucket, key, version_id_, request_settings_); total_size += info->size; String path = fs::path(bucket) / key; @@ -510,14 +511,15 @@ StorageS3Source::KeysIterator::KeysIterator( const std::string & version_id_, const std::vector & keys_, const String & bucket_, + const S3Settings::RequestSettings & request_settings_, ASTPtr query, const Block & virtual_header, ContextPtr context, ObjectInfos * object_infos, Strings * read_keys) : pimpl(std::make_shared( - client_, version_id_, keys_, bucket_, query, - virtual_header, context, object_infos, read_keys)) + client_, version_id_, keys_, bucket_, request_settings_, + query, virtual_header, context, object_infos, read_keys)) { } @@ -579,22 +581,13 @@ StorageS3Source::StorageS3Source( reader_future = createReaderAsync(); } - -void StorageS3Source::onCancel() -{ - std::lock_guard lock(reader_mutex); - if (reader) - reader->cancel(); -} - - StorageS3Source::ReaderHolder StorageS3Source::createReader() { auto [current_key, info] = (*file_iterator)(); if (current_key.empty()) return {}; - size_t object_size = info ? info->size : S3::getObjectSize(*client, bucket, current_key, version_id); + size_t object_size = info ? info->size : S3::getObjectSize(*client, bucket, current_key, version_id, request_settings); int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); auto read_buf = wrapReadBufferWithCompressionMethod( @@ -708,8 +701,12 @@ Chunk StorageS3Source::generate() { while (true) { - if (!reader || isCancelled()) + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); break; + } Chunk chunk; if (reader->pull(chunk)) @@ -741,21 +738,19 @@ Chunk StorageS3Source::generate() return chunk; } - { - std::lock_guard lock(reader_mutex); - assert(reader_future.valid()); - reader = reader_future.get(); + assert(reader_future.valid()); + reader = reader_future.get(); - if (!reader) - break; + if (!reader) + break; - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool.wait(); + reader_future = createReaderAsync(); } + return {}; } @@ -1016,7 +1011,7 @@ std::shared_ptr StorageS3::createFileIterator( { return std::make_shared( *s3_configuration.client, s3_configuration.uri.version_id, keys, - s3_configuration.uri.bucket, query, virtual_block, local_context, + s3_configuration.uri.bucket, s3_configuration.request_settings, query, virtual_block, local_context, object_infos, read_keys); } } @@ -1146,11 +1141,12 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr else { if (is_key_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert; - if (!truncate_in_insert && S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, keys.back(), s3_configuration.uri.version_id)) + if (!truncate_in_insert && S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, keys.back(), s3_configuration.uri.version_id, s3_configuration.request_settings)) { if (local_context->getSettingsRef().s3_create_new_file_on_insert) { @@ -1162,16 +1158,17 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr new_key = keys[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : keys[0].substr(pos)); ++index; } - while (S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, new_key, s3_configuration.uri.version_id)); + while (S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, new_key, s3_configuration.uri.version_id, s3_configuration.request_settings)); keys.push_back(new_key); } else throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - s3_configuration.uri.bucket, - keys.back()); + ErrorCodes::BAD_ARGUMENTS, + "Object in bucket {} with key {} already exists. " + "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", + s3_configuration.uri.bucket, + keys.back()); } return std::make_shared( @@ -1191,7 +1188,8 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, updateS3Configuration(local_context, s3_configuration); if (is_key_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); Aws::S3::Model::Delete delkeys; @@ -1211,7 +1209,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); + throw Exception(ErrorCodes::S3_ERROR, "{}: {}", std::to_string(static_cast(err.GetErrorType())), err.GetMessage()); } } @@ -1301,9 +1299,9 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') if (engine_args.empty() || engine_args.size() > 5) - throw Exception( - "Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage S3 requires 1 to 5 arguments: " + "url, [access_key_id, secret_access_key], name of used format and [compression_method]."); auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers, local_context); if (header_it != engine_args.end()) @@ -1391,9 +1389,8 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( if (first) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path in S3. You must specify " - "table structure manually", - format); + "Cannot extract table structure from {} format file, because there are no files with provided path " + "in S3. You must specify table structure manually", format); return nullptr; } @@ -1546,7 +1543,8 @@ std::optional StorageS3::tryGetColumnsFromCache( /// Note that in case of exception in getObjectInfo returned info will be empty, /// but schema cache will handle this case and won't return columns from cache /// because we can't say that it's valid without last modification time. - info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, {}, /* throw_on_error= */ false); + info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, s3_configuration.request_settings, + {}, {}, /* throw_on_error= */ false); if (object_infos) (*object_infos)[path] = info; } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 02fcb7d624c..16e38249595 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include @@ -92,6 +92,7 @@ public: const std::string & version_id_, const std::vector & keys_, const String & bucket_, + const S3Settings::RequestSettings & request_settings_, ASTPtr query, const Block & virtual_header, ContextPtr context, @@ -145,8 +146,6 @@ public: Chunk generate() override; - void onCancel() override; - private: String name; String bucket; @@ -209,8 +208,6 @@ private: ReaderHolder reader; - /// onCancel and generate can be called concurrently - std::mutex reader_mutex; std::vector requested_virtual_columns; std::shared_ptr file_iterator; size_t download_thread_num = 1; diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index 3ee10113b32..0ef02cac790 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -8,6 +8,7 @@ #include "Client/Connection.h" #include "Core/QueryProcessingStage.h" #include +#include #include #include #include diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index be83d60131a..ee0b1fd88bf 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -116,20 +116,17 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() if (!upload_part_size_multiply_factor) throw Exception( ErrorCodes::INVALID_SETTING_VALUE, - "Setting upload_part_size_multiply_factor cannot be zero", - upload_part_size_multiply_factor); + "Setting upload_part_size_multiply_factor cannot be zero"); if (!upload_part_size_multiply_parts_count_threshold) throw Exception( ErrorCodes::INVALID_SETTING_VALUE, - "Setting upload_part_size_multiply_parts_count_threshold cannot be zero", - upload_part_size_multiply_parts_count_threshold); + "Setting upload_part_size_multiply_parts_count_threshold cannot be zero"); if (!max_part_number) throw Exception( ErrorCodes::INVALID_SETTING_VALUE, - "Setting max_part_number cannot be zero", - max_part_number); + "Setting max_part_number cannot be zero"); static constexpr size_t max_part_number_limit = 10000; if (max_part_number > max_part_number_limit) @@ -141,9 +138,10 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() size_t maybe_overflow; if (common::mulOverflow(max_upload_part_size, upload_part_size_multiply_factor, maybe_overflow)) throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting upload_part_size_multiply_factor is too big ({}). Multiplication to max_upload_part_size ({}) will cause integer overflow", - ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor is too big ({}). " + "Multiplication to max_upload_part_size ({}) will cause integer overflow", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); std::unordered_set storage_class_names {"STANDARD", "INTELLIGENT_TIERING"}; if (!storage_class_name.empty() && !storage_class_names.contains(storage_class_name)) @@ -168,6 +166,7 @@ S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection) max_single_read_retries = collection.getOrDefault("max_single_read_retries", max_single_read_retries); max_connections = collection.getOrDefault("max_connections", max_connections); list_object_keys_size = collection.getOrDefault("list_object_keys_size", list_object_keys_size); + allow_head_object_request = collection.getOrDefault("allow_head_object_request", allow_head_object_request); } S3Settings::RequestSettings::RequestSettings( @@ -182,6 +181,7 @@ S3Settings::RequestSettings::RequestSettings( max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections); check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload); list_object_keys_size = config.getUInt64(key + "list_object_keys_size", settings.s3_list_object_keys_size); + allow_head_object_request = config.getBool(key + "allow_head_object_request", allow_head_object_request); /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index bce772859f0..61da0a37f62 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -67,6 +67,16 @@ struct S3Settings ThrottlerPtr get_request_throttler; ThrottlerPtr put_request_throttler; + /// If this is set to false then `S3::getObjectSize()` will use `GetObjectAttributes` request instead of `HeadObject`. + /// Details: `HeadObject` request never returns a response body (even if there is an error) however + /// if the request was sent without specifying a region in the endpoint (i.e. for example "https://test.s3.amazonaws.com/mydata.csv" + /// instead of "https://test.s3-us-west-2.amazonaws.com/mydata.csv") then that response body is one of the main ways to determine + /// the correct region and try to repeat the request again with the correct region. + /// For any other request type (`GetObject`, `ListObjects`, etc.) AWS SDK does that because they have response bodies, but for `HeadObject` + /// there is no response body so this way doesn't work. That's why it's better to use `GetObjectAttributes` requests sometimes. + /// See https://github.com/aws/aws-sdk-cpp/issues/1558 and also the function S3ErrorMarshaller::ExtractRegion() for more information. + bool allow_head_object_request = true; + const PartUploadSettings & getUploadSettings() const { return upload_settings; } RequestSettings() = default; diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 92f954ebb9d..706bc31122c 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -161,8 +161,7 @@ void registerStorageSQLite(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() != 2) - throw Exception("SQLite database requires 2 arguments: database path, table name", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "SQLite database requires 2 arguments: database path, table name"); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 48f8adfece2..7c5ba497ec9 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -129,7 +129,7 @@ StorageSetOrJoinBase::StorageSetOrJoinBase( if (relative_path_.empty()) - throw Exception("Join and Set storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Join and Set storages require data path"); path = relative_path_; } @@ -162,7 +162,11 @@ std::optional StorageSet::totalBytes(const Settings &) const { return se void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, ContextPtr, TableExclusiveLockHolder &) { - disk->removeRecursive(path); + if (disk->exists(path)) + disk->removeRecursive(path); + else + LOG_INFO(&Poco::Logger::get("StorageSet"), "Path {} is already removed from disk {}", path, disk->getName()); + disk->createDirectories(path); disk->createDirectories(fs::path(path) / "tmp/"); @@ -242,9 +246,8 @@ void registerStorageSet(StorageFactory & factory) factory.registerStorage("Set", [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); bool has_settings = args.storage_def->settings; SetSettings set_settings; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 62823f6dabd..be5045b884f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -175,7 +175,7 @@ public: *data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), storage.max_compress_block_size)) { if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Ensure that indices are loaded because we're going to update them. storage.loadIndices(lock); @@ -283,7 +283,7 @@ StorageStripeLog::StorageStripeLog( setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Storage {} requires data path", getName()); /// Ensure the file checker is initialized. if (file_checker.empty()) @@ -358,7 +358,7 @@ Pipe StorageStripeLog::read( ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); size_t data_file_size = file_checker.getFileSize(data_file_path); if (!data_file_size) @@ -396,7 +396,7 @@ SinkToStoragePtr StorageStripeLog::write(const ASTPtr & /*query*/, const Storage { WriteLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return std::make_shared(*this, metadata_snapshot, std::move(lock)); } @@ -406,7 +406,7 @@ CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, ContextPtr { ReadLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return file_checker.check(); } @@ -435,7 +435,7 @@ void StorageStripeLog::loadIndices(std::chrono::seconds lock_timeout) /// a data race between two threads trying to load indices simultaneously. WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); loadIndices(lock); } @@ -532,7 +532,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); if (!file_checker.getFileSize(data_file_path)) return; @@ -603,7 +603,7 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String & { WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Load the indices if not loaded yet. We have to do that now because we're going to update these indices. loadIndices(lock); @@ -675,9 +675,8 @@ void registerStorageStripeLog(StorageFactory & factory) factory.registerStorage("StripeLog", [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); String disk_name = getDiskName(*args.storage_def); DiskPtr disk = args.getContext()->getDisk(disk_name); diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index b105e50a54f..9ba7497fbf2 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -140,7 +140,7 @@ public: auto actual_structure = storage->getInMemoryMetadataPtr()->getSampleBlock(); if (!blocksHaveEqualStructure(actual_structure, cached_structure) && add_conversion) { - throw Exception("Source storage and table function have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS); + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Source storage and table function have different structure"); } return storage->write(query, metadata_snapshot, context); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index f1ba7b63e7b..355daaffcd7 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -167,6 +167,20 @@ std::vector getPathsList(const String & uri, ContextPtr context) return urls_to_check; } +static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri) +{ + const auto & user_info = request_uri.getUserInfo(); + if (!user_info.empty()) + { + std::size_t n = user_info.find(':'); + if (n != std::string::npos) + { + credentials.setUsername(user_info.substr(0, n)); + credentials.setPassword(user_info.substr(n + 1)); + } + } +} + void StorageURLSource::setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri) { const auto & user_info = request_uri.getUserInfo(); @@ -202,12 +216,30 @@ StorageURLSource::StorageURLSource( { auto headers = getHeaders(headers_); + /// Lazy initialization. We should not perform requests in constructor, because we need to do it in query pipeline. initialize = [=, this](const URIInfo::FailoverOptions & uri_options) { if (uri_options.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty url list"); + Chunk generate() override + { + while (true) + { + if (isCancelled()) + { + if (reader) + reader->cancel(); + break; + } + + if (!reader) + { + auto current_uri_pos = uri_info->next_uri_to_read.fetch_add(1); + if (current_uri_pos >= uri_info->uri_list_to_read.size()) + return {}; + auto first_option = uri_options.begin(); read_buf = getFirstAvailableURLReadBuffer( first_option, @@ -224,6 +256,7 @@ StorageURLSource::StorageURLSource( uri_options.size() == 1, download_threads); + auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -238,6 +271,20 @@ StorageURLSource::StorageURLSource( }; } + initialize(current_uri); + } + + Chunk chunk; + if (reader->pull(chunk)) + return chunk; + + pipeline->reset(); + reader.reset(); + } + return {}; + } + + std::unique_ptr StorageURLSource::getFirstAvailableURLReadBuffer( std::vector::const_iterator & option, const std::vector::const_iterator & end, @@ -434,9 +481,14 @@ private: Strings::iterator uris_iter; }; + StorageURLSource::DisclosedGlobIterator::DisclosedGlobIterator(ContextPtr context_, const String & uri) : pimpl(std::make_shared(context_, uri)) {} + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + String StorageURLSource::DisclosedGlobIterator::next() { return pimpl->next(); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 9a75f8277fd..1a7050b4dff 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -113,7 +113,7 @@ StorageView::StorageView( storage_metadata.setComment(comment); if (!query.select) - throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); SelectQueryDescription description; description.inner_query = query.select->ptr(); @@ -138,7 +138,7 @@ void StorageView::read( if (query_info.view_query) { if (!query_info.view_query->as()) - throw Exception("Unexpected optimized VIEW query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected optimized VIEW query"); current_inner_query = query_info.view_query->clone(); } @@ -193,12 +193,12 @@ void StorageView::read( static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_query) { if (!select_query.tables() || select_query.tables()->children.empty()) - throw Exception("Logical error: no table expression in view select AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no table expression in view select AST"); auto * select_element = select_query.tables()->children[0]->as(); if (!select_element->table_expression) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); return select_element->table_expression->as(); } @@ -229,7 +229,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ } if (!table_expression->database_and_table_name) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); } DatabaseAndTableWithAlias db_table(table_expression->database_and_table_name); @@ -292,7 +292,7 @@ ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr ASTTableExpression * table_expression = getFirstTableExpression(select_query); if (!table_expression->subquery) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); ASTPtr subquery = table_expression->subquery; table_expression->subquery = {}; @@ -309,7 +309,7 @@ void registerStorageView(StorageFactory & factory) factory.registerStorage("View", [](const StorageFactory::Arguments & args) { if (args.query.storage) - throw Exception("Specifying ENGINE is not allowed for a View", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a View"); return std::make_shared(args.table_id, args.query, args.columns, args.comment); }); diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 5f57d37278b..5dd21d98a7e 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -165,15 +165,16 @@ namespace ASTs & engine_args = args.engine_args; if (engine_args.size() != 3) - throw Exception("Storage " + name + " requires exactly 3 parameters: " + name + "('DSN', database or schema, table)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage {} requires exactly 3 parameters: {}('DSN', database or schema, table)", name, name); for (size_t i = 0; i < 3; ++i) engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.getLocalContext()); BridgeHelperPtr bridge_helper = std::make_shared>(args.getContext(), args.getContext()->getSettingsRef().http_receive_timeout.value, - checkAndGetLiteralArgument(engine_args[0], "connection_string")); + checkAndGetLiteralArgument(engine_args[0], "connection_string"), + args.getContext()->getSettingsRef().odbc_bridge_use_connection_pooling.value); return std::make_shared( args.table_id, checkAndGetLiteralArgument(engine_args[1], "database_name"), diff --git a/src/Storages/System/StorageSystemBackups.cpp b/src/Storages/System/StorageSystemBackups.cpp index 52a26fe0cd6..268cc9d0963 100644 --- a/src/Storages/System/StorageSystemBackups.cpp +++ b/src/Storages/System/StorageSystemBackups.cpp @@ -19,6 +19,8 @@ NamesAndTypesList StorageSystemBackups::getNamesAndTypes() {"name", std::make_shared()}, {"status", std::make_shared(getBackupStatusEnumValues())}, {"num_files", std::make_shared()}, + {"num_processed_files", std::make_shared()}, + {"processed_files_size", std::make_shared()}, {"uncompressed_size", std::make_shared()}, {"compressed_size", std::make_shared()}, {"error", std::make_shared()}, @@ -36,6 +38,8 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con auto & column_name = assert_cast(*res_columns[column_index++]); auto & column_status = assert_cast(*res_columns[column_index++]); auto & column_num_files = assert_cast(*res_columns[column_index++]); + auto & column_num_processed_files = assert_cast(*res_columns[column_index++]); + auto & column_processed_files_size = assert_cast(*res_columns[column_index++]); auto & column_uncompressed_size = assert_cast(*res_columns[column_index++]); auto & column_compressed_size = assert_cast(*res_columns[column_index++]); auto & column_error = assert_cast(*res_columns[column_index++]); @@ -48,6 +52,8 @@ void StorageSystemBackups::fillData(MutableColumns & res_columns, ContextPtr con column_name.insertData(info.name.data(), info.name.size()); column_status.insertValue(static_cast(info.status)); column_num_files.insertValue(info.num_files); + column_num_processed_files.insertValue(info.num_processed_files); + column_processed_files_size.insertValue(info.processed_files_size); column_uncompressed_size.insertValue(info.uncompressed_size); column_compressed_size.insertValue(info.compressed_size); column_error.insertData(info.error_message.data(), info.error_message.size()); diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f69f9f8ee7f..6ca6a9db046 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -30,6 +30,7 @@ const char * auto_contributors[] { "Aleksandr Shalimov", "Aleksandra (Ася)", "Aleksandrov Vladimir", + "Aleksei Filatov", "Aleksei Levushkin", "Aleksei Semiglazov", "Aleksey", @@ -192,6 +193,7 @@ const char * auto_contributors[] { "Bill", "BiteTheDDDDt", "BlahGeek", + "Bo Lu", "Bogdan", "Bogdan Voronin", "BohuTANG", @@ -256,6 +258,7 @@ const char * auto_contributors[] { "Denis Krivak", "Denis Zhuravlev", "Denny Crane", + "Denys Golotiuk", "Derek Chia", "Derek Perkins", "Diego Nieto (lesandie)", @@ -300,6 +303,7 @@ const char * auto_contributors[] { "Elizaveta Mironyuk", "Elykov Alexandr", "Emmanuel Donin de Rosière", + "Enrique Herreros", "Eric", "Eric Daniel", "Erixonich", @@ -476,6 +480,7 @@ const char * auto_contributors[] { "Kirill Shvakov", "Koblikov Mihail", "KochetovNicolai", + "Konstantin Bogdanov", "Konstantin Grabar", "Konstantin Ilchenko", "Konstantin Lebedev", @@ -571,6 +576,7 @@ const char * auto_contributors[] { "Mc.Spring", "Meena Renganathan", "Meena-Renganathan", + "MeenaRenganathan22", "MeiK", "Memo", "Metehan Çetinkaya", @@ -866,10 +872,12 @@ const char * auto_contributors[] { "VDimir", "VVMak", "Vadim", + "Vadim Akhma", "Vadim Plakhtinskiy", "Vadim Skipin", "Vadim Volodin", "VadimPE", + "Vage Ogannisian", "Val", "Valera Ryaboshapko", "Varinara", @@ -1033,6 +1041,7 @@ const char * auto_contributors[] { "bobrovskij artemij", "booknouse", "bseng", + "candiduslynx", "canenoneko", "caspian", "cekc", @@ -1266,6 +1275,7 @@ const char * auto_contributors[] { "maxim-babenko", "maxkuzn", "maxulan", + "mayamika", "mehanizm", "melin", "memo", @@ -1348,7 +1358,10 @@ const char * auto_contributors[] { "ritaank", "rnbondarenko", "robert", + "robot-ch-test-poll1", + "robot-ch-test-poll4", "robot-clickhouse", + "robot-clickhouse-ci-1", "robot-metrika-test", "rodrigargar", "roman", @@ -1372,7 +1385,9 @@ const char * auto_contributors[] { "shedx", "shuchaome", "shuyang", + "sichenzhao", "simon-says", + "simpleton", "snyk-bot", "songenjie", "sperlingxx", @@ -1380,6 +1395,7 @@ const char * auto_contributors[] { "spongedc", "spume", "spyros87", + "stan", "stavrolia", "stepenhu", "su-houzhen", @@ -1435,6 +1451,7 @@ const char * auto_contributors[] { "wangdh15", "weeds085490", "whysage", + "wineternity", "wuxiaobai24", "wzl", "xPoSx", @@ -1458,6 +1475,7 @@ const char * auto_contributors[] { "yonesko", "youenn lebras", "young scott", + "yuanyimeng", "yuchuansun", "yuefoo", "yulu86", diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 01c7b7d69e4..494f9c9c31f 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index b35fdd3f85e..d8a49880257 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -12,6 +12,8 @@ NamesAndTypesList StorageSystemFormats::getNamesAndTypes() {"name", std::make_shared()}, {"is_input", std::make_shared()}, {"is_output", std::make_shared()}, + {"supports_parallel_parsing", std::make_shared()}, + {"supports_parallel_formatting", std::make_shared()}, }; } @@ -23,9 +25,14 @@ void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, co const auto & [format_name, creators] = pair; UInt64 has_input_format(creators.input_creator != nullptr); UInt64 has_output_format(creators.output_creator != nullptr); + UInt64 supports_parallel_parsing(creators.file_segmentation_engine != nullptr); + UInt64 supports_parallel_formatting(creators.supports_parallel_formatting); + res_columns[0]->insert(format_name); res_columns[1]->insert(has_input_format); res_columns[2]->insert(has_output_format); + res_columns[3]->insert(supports_parallel_parsing); + res_columns[4]->insert(supports_parallel_formatting); } } diff --git a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp b/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp index 731cdf94157..3bb92814a2f 100644 --- a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp +++ b/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp @@ -100,8 +100,9 @@ void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns, bool precise = false; String key = extractKey(query_info.query, precise); if (key.empty()) - throw Exception( - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' or key LIKE 'prefix%' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " + "or key LIKE 'prefix%' in WHERE clause."); auto cache = context->getMergeTreeMetadataCache(); if (precise) @@ -118,8 +119,9 @@ void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns, { String target = extractFixedPrefixFromLikePattern(key, /*requires_perfect_prefix*/ false); if (target.empty()) - throw Exception( - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' or key LIKE 'prefix%' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " + "or key LIKE 'prefix%' in WHERE clause."); Strings keys; Strings values; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index a0c022f5540..fb7f4f9066a 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -232,7 +232,7 @@ StoragesInfo StoragesInfoStream::next() info.data = dynamic_cast(info.storage.get()); if (!info.data) - throw Exception("Unknown engine " + info.engine, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine); return info; } diff --git a/src/Storages/System/StorageSystemQueryCache.cpp b/src/Storages/System/StorageSystemQueryCache.cpp new file mode 100644 index 00000000000..2de8e4594b9 --- /dev/null +++ b/src/Storages/System/StorageSystemQueryCache.cpp @@ -0,0 +1,55 @@ +#include "StorageSystemQueryCache.h" +#include +#include +#include +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemQueryCache::getNamesAndTypes() +{ + return { + {"query", std::make_shared()}, + {"key_hash", std::make_shared()}, + {"expires_at", std::make_shared()}, + {"stale", std::make_shared()}, + {"shared", std::make_shared()}, + {"result_size", std::make_shared()} + }; +} + +StorageSystemQueryCache::StorageSystemQueryCache(const StorageID & table_id_) + : IStorageSystemOneBlock(table_id_) +{ +} + +void StorageSystemQueryCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + auto query_cache = context->getQueryCache(); + + if (!query_cache) + return; + + const String & username = context->getUserName(); + + std::lock_guard lock(query_cache->mutex); + + for (const auto & [key, result] : query_cache->cache) + { + /// Showing other user's queries is considered a security risk + if (key.username.has_value() && key.username != username) + continue; + + res_columns[0]->insert(key.queryStringFromAst()); /// approximates the original query string + res_columns[1]->insert(key.ast->getTreeHash().first); + res_columns[2]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); + res_columns[3]->insert(key.expires_at < std::chrono::system_clock::now()); + res_columns[4]->insert(!key.username.has_value()); + res_columns[5]->insert(result.sizeInBytes()); + } +} + +} diff --git a/src/Storages/System/StorageSystemQueryCache.h b/src/Storages/System/StorageSystemQueryCache.h new file mode 100644 index 00000000000..5ff5f0a0454 --- /dev/null +++ b/src/Storages/System/StorageSystemQueryCache.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace DB +{ + +class StorageSystemQueryCache final : public IStorageSystemOneBlock +{ +public: + explicit StorageSystemQueryCache(const StorageID & table_id_); + + std::string getName() const override { return "SystemQueryCache"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index df3d8b74e6e..da3d6b98dc5 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -151,7 +151,7 @@ namespace continue; /// Drain delayed notifications. } - throw Exception("Logical error: read wrong number of bytes from pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: read wrong number of bytes from pipe"); } } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 1212d9da60a..9663c76a5c3 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -122,22 +122,22 @@ public: /// We don't expect a "name" contains a path. if (name.find('/') != std::string::npos) { - throw Exception("Column `name` should not contain '/'", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column `name` should not contain '/'"); } if (name.empty()) { - throw Exception("Column `name` should not be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column `name` should not be empty"); } if (path.empty()) { - throw Exception("Column `path` should not be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column `path` should not be empty"); } if (path.size() + name.size() > PATH_MAX) { - throw Exception("Sum of `name` length and `path` length should not exceed PATH_MAX", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sum of `name` length and `path` length should not exceed PATH_MAX"); } std::vector path_vec; @@ -176,7 +176,7 @@ StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) SinkToStoragePtr StorageSystemZooKeeper::write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr context) { if (!context->getConfigRef().getBool("allow_zookeeper_write", false)) - throw Exception("Prohibit writing to system.zookeeper, unless config `allow_zookeeper_write` as true", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Prohibit writing to system.zookeeper, unless config `allow_zookeeper_write` as true"); Block write_header; write_header.insert(ColumnWithTypeAndName(std::make_shared(), "name")); write_header.insert(ColumnWithTypeAndName(std::make_shared(), "value")); @@ -404,7 +404,10 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr c zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); if (paths.empty()) - throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "SELECT from system.zookeeper table must contain condition like path = 'path' " + "or path IN ('path1','path2'...) or path IN (subquery) " + "in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`."); std::unordered_set added; while (!paths.empty()) diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index e82f7c9bb2b..07db151069f 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -175,6 +176,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "part_moves_between_shards"); attach(context, system_database, "asynchronous_inserts"); attach(context, system_database, "filesystem_cache"); + attach(context, system_database, "query_cache"); attach(context, system_database, "remote_data_paths"); attach(context, system_database, "certificates"); attach(context, system_database, "named_collections"); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 2971d977099..e1a80800630 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -63,11 +63,9 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin { const IFunctionBase & func = *action.node->function_base; if (!func.isDeterministic()) - throw Exception( - "TTL expression cannot contain non-deterministic functions, " - "but contains function " - + func.getName(), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "TTL expression cannot contain non-deterministic functions, but contains function {}", + func.getName()); } } @@ -76,9 +74,9 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin if (!typeid_cast(result_column.type.get()) && !typeid_cast(result_column.type.get())) { - throw Exception( - "TTL expression result column should have DateTime or Date type, but has " + result_column.type->getName(), - ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, + "TTL expression result column should have DateTime or Date type, but has {}", + result_column.type->getName()); } } @@ -206,7 +204,7 @@ TTLDescription TTLDescription::getTTLFromAST( const auto & pk_columns = primary_key.column_names; if (ttl_element->group_by_key.size() > pk_columns.size()) - throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key"); NameSet aggregation_columns_set; NameSet used_primary_key_columns_set; @@ -214,9 +212,7 @@ TTLDescription TTLDescription::getTTLFromAST( for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i) { if (ttl_element->group_by_key[i]->getColumnName() != pk_columns[i]) - throw Exception( - "TTL Expression GROUP BY key should be a prefix of primary key", - ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key"); used_primary_key_columns_set.insert(pk_columns[i]); } @@ -240,9 +236,7 @@ TTLDescription TTLDescription::getTTLFromAST( } if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size()) - throw Exception( - "Multiple aggregations set for one column in TTL Expression", - ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "Multiple aggregations set for one column in TTL Expression"); result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size()); @@ -350,7 +344,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST( if (!ttl.where_expression) { if (have_unconditional_delete_ttl) - throw Exception("More than one DELETE TTL expression without WHERE expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "More than one DELETE TTL expression without WHERE expression is not allowed"); have_unconditional_delete_ttl = true; result.rows_ttl = ttl; diff --git a/src/Storages/UVLoop.h b/src/Storages/UVLoop.h index 6b24252077e..4945e1b56fa 100644 --- a/src/Storages/UVLoop.h +++ b/src/Storages/UVLoop.h @@ -25,7 +25,7 @@ public: int res = uv_loop_init(loop_ptr.get()); if (res != 0) - throw Exception("UVLoop could not initialize", ErrorCodes::SYSTEM_ERROR); + throw Exception(ErrorCodes::SYSTEM_ERROR, "UVLoop could not initialize"); } ~UVLoop() diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ffa04bcdd83..4ff00facfdc 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -132,7 +132,7 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast) { if (block.rows() == 0) - throw Exception("Cannot prepare filter with empty block", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot prepare filter with empty block"); /// Take the first row of the input block to build a constant block auto columns = block.getColumns(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 442a7822e33..0d24bf7899f 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -119,8 +119,7 @@ namespace else { if (data.check_duplicate_window && serializeAST(*temp_node) != data.serialized_window_function) - throw Exception( - "WINDOW VIEW only support ONE TIME WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "WINDOW VIEW only support ONE TIME WINDOW FUNCTION"); t->name = "windowID"; } } @@ -257,13 +256,13 @@ namespace const auto * arg = ast->as(); if (!arg || !startsWith(arg->name, "toInterval") || !IntervalKind::tryParseString(Poco::toLower(arg->name.substr(10)), kind)) - throw Exception(err_msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception::createDeprecated(err_msg, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); const auto * interval_unit = arg->children.front()->children.front()->as(); if (!interval_unit || (interval_unit->value.getType() != Field::Types::String && interval_unit->value.getType() != Field::Types::UInt64)) - throw Exception("Interval argument must be integer", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval argument must be integer"); if (interval_unit->value.getType() == Field::Types::String) num_units = parse(interval_unit->value.safeGet()); @@ -271,7 +270,7 @@ namespace num_units = interval_unit->value.safeGet(); if (num_units <= 0) - throw Exception("Value for Interval argument must be positive.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for Interval argument must be positive."); } UInt32 addTime(UInt32 time_sec, IntervalKind::Kind kind, Int64 num_units, const DateLUTImpl & time_zone) @@ -281,7 +280,7 @@ namespace case IntervalKind::Nanosecond: case IntervalKind::Microsecond: case IntervalKind::Millisecond: - throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by windows yet"); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: { \ return AddTime::execute(time_sec, num_units, time_zone); \ @@ -367,17 +366,16 @@ static void extractDependentTable(ContextPtr context, ASTPtr & query, String & s else if (auto * ast_select = subquery->as()) { if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for WINDOW VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "UNION is not supported for WINDOW VIEW"); auto & inner_select_query = ast_select->list_of_selects->children.at(0); extractDependentTable(context, inner_select_query, select_database_name, select_table_name); } else - throw Exception( + throw Exception(DB::ErrorCodes::LOGICAL_ERROR, "Logical error while creating StorageWindowView." - " Could not retrieve table name from select query.", - DB::ErrorCodes::LOGICAL_ERROR); + " Could not retrieve table name from select query."); } UInt32 StorageWindowView::getCleanupBound() @@ -872,7 +870,7 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) case IntervalKind::Nanosecond: case IntervalKind::Microsecond: case IntervalKind::Millisecond: - throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by windows yet"); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ @@ -905,7 +903,7 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) case IntervalKind::Nanosecond: case IntervalKind::Microsecond: case IntervalKind::Millisecond: - throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by window view yet"); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ @@ -1168,9 +1166,9 @@ StorageWindowView::StorageWindowView( /// If the target table is not set, use inner target table has_inner_target_table = query.to_table_id.empty(); if (has_inner_target_table && !query.storage) - throw Exception( - "You must specify where to save results of a WindowView query: either ENGINE or an existing table in a TO clause", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "You must specify where to save results of a WindowView query: " + "either ENGINE or an existing table in a TO clause"); if (query.select->list_of_selects->children.size() != 1) throw Exception( @@ -1254,7 +1252,7 @@ ASTPtr StorageWindowView::initInnerQuery(ASTSelectQuery query, ContextPtr contex ReplaceFunctionNowVisitor(func_now_data).visit(mergeable_query); is_time_column_func_now = func_now_data.is_time_column_func_now; if (!is_proctime && is_time_column_func_now) - throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "now() is not supported for Event time processing."); if (is_time_column_func_now) window_id_name = func_now_data.window_id_name; @@ -1443,11 +1441,11 @@ void StorageWindowView::writeIntoWindowView( }); } - std::shared_lock fire_signal_lock; + std::shared_lock fire_signal_lock; QueryPipelineBuilder builder; if (window_view.is_proctime) { - fire_signal_lock = std::shared_lock(window_view.fire_signal_mutex); + fire_signal_lock = std::shared_lock(window_view.fire_signal_mutex); /// Fill ____timestamp column with current time in case of now() time column. if (window_view.is_time_column_func_now) @@ -1663,9 +1661,9 @@ void registerStorageWindowView(StorageFactory & factory) factory.registerStorage("WindowView", [](const StorageFactory::Arguments & args) { if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_window_view) - throw Exception( - "Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental WINDOW VIEW feature " + "is not enabled (the setting 'allow_experimental_window_view')"); return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.attach); }); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 6da34389e4d..b313e466211 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -213,7 +214,7 @@ private: /// Mutex for the blocks and ready condition std::mutex mutex; - std::shared_mutex fire_signal_mutex; + SharedMutex fire_signal_mutex; mutable std::mutex sample_block_lock; /// Mutex to protect access to sample block IntervalKind::Kind window_kind; diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp index 31f49fa5490..75c5bebb901 100644 --- a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp +++ b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp @@ -37,7 +37,8 @@ void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & auto structure_literal = std::make_shared(structure); if (expression_list->children.size() < 2 || expression_list->children.size() > max_arguments) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", function_name, max_arguments, expression_list->children.size()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", + function_name, max_arguments, expression_list->children.size()); if (expression_list->children.size() == 2 || expression_list->children.size() == max_arguments - 1) { diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 7bd5e629c39..0721cfaa9c4 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -155,9 +155,8 @@ ColumnsDescription getStructureOfRemoteTable( } } - throw NetException( - "All attempts to get table structure failed. Log: \n\n" + fail_messages + "\n", - ErrorCodes::NO_REMOTE_SHARD_AVAILABLE); + throw NetException(ErrorCodes::NO_REMOTE_SHARD_AVAILABLE, + "All attempts to get table structure failed. Log: \n\n{}\n", fail_messages); } ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( @@ -220,7 +219,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( } if (columns.empty()) - throw NetException("All attempts to get table structure failed", ErrorCodes::NO_REMOTE_SHARD_AVAILABLE); + throw NetException(ErrorCodes::NO_REMOTE_SHARD_AVAILABLE, "All attempts to get table structure failed"); return columns; } diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 51b11680f82..1ff310c3fac 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -116,7 +116,7 @@ bool isCompatible(IAST & node) return false; if (!function->arguments) - throw Exception("Logical error: function->arguments is not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: function->arguments is not set"); String name = function->name; @@ -288,7 +288,7 @@ String transformQueryForExternalDatabase( } else if (strict) { - throw Exception("Query contains non-compatible expressions (and external_table_strict_query=true)", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Query contains non-compatible expressions (and external_table_strict_query=true)"); } else if (const auto * function = original_where->as()) { @@ -309,7 +309,7 @@ String transformQueryForExternalDatabase( } else if (strict && original_where) { - throw Exception("Query contains non-compatible expressions (and external_table_strict_query=true)", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Query contains non-compatible expressions (and external_table_strict_query=true)"); } auto * literal_expr = typeid_cast(original_where.get()); diff --git a/src/TableFunctions/Hive/TableFunctionHive.cpp b/src/TableFunctions/Hive/TableFunctionHive.cpp index 7e14f5ca300..fb7635181dc 100644 --- a/src/TableFunctions/Hive/TableFunctionHive.cpp +++ b/src/TableFunctions/Hive/TableFunctionHive.cpp @@ -32,13 +32,10 @@ namespace DB ASTs & args = args_func.at(0)->children; - const auto message = fmt::format( - "The signature of function {} is:\n" - " - hive_url, hive_database, hive_table, structure, partition_by_keys", - getName()); - if (args.size() != 5) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, message); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "The signature of function {} is:\n - hive_url, hive_database, hive_table, structure, partition_by_keys", + getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context_); diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index d62e44a16cc..8cbffc10e5a 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -45,12 +45,12 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; if (args.empty()) - throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires at least 1 argument", getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); @@ -67,8 +67,10 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context return; if (args.size() != 3 && args.size() != 4) - throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires 1, 2, 3 or 4 arguments: " + "filename, format (default auto), structure (default auto) and compression method (default auto)", + getName()); structure = checkAndGetLiteralArgument(args[2], "structure"); diff --git a/src/TableFunctions/ITableFunctionXDBC.cpp b/src/TableFunctions/ITableFunctionXDBC.cpp index 3d72d98e7ea..3abda5061df 100644 --- a/src/TableFunctions/ITableFunctionXDBC.cpp +++ b/src/TableFunctions/ITableFunctionXDBC.cpp @@ -56,7 +56,7 @@ void ITableFunctionXDBC::startBridgeIfNot(ContextPtr context) const { if (!helper) { - helper = createBridgeHelper(context, context->getSettingsRef().http_receive_timeout.value, connection_string); + helper = createBridgeHelper(context, context->getSettingsRef().http_receive_timeout.value, connection_string, context->getSettingsRef().odbc_bridge_use_connection_pooling.value); helper->startBridgeSync(); } } diff --git a/src/TableFunctions/ITableFunctionXDBC.h b/src/TableFunctions/ITableFunctionXDBC.h index 42a3d30a728..984a6a1957f 100644 --- a/src/TableFunctions/ITableFunctionXDBC.h +++ b/src/TableFunctions/ITableFunctionXDBC.h @@ -21,7 +21,8 @@ private: /* A factory method to create bridge helper, that will assist in remote interaction */ virtual BridgeHelperPtr createBridgeHelper(ContextPtr context, Poco::Timespan http_timeout_, - const std::string & connection_string_) const = 0; + const std::string & connection_string_, + bool use_connection_pooling_) const = 0; ColumnsDescription getActualTableStructure(ContextPtr context) const override; @@ -47,9 +48,10 @@ public: private: BridgeHelperPtr createBridgeHelper(ContextPtr context, Poco::Timespan http_timeout_, - const std::string & connection_string_) const override + const std::string & connection_string_, + bool use_connection_pooling_) const override { - return std::make_shared>(context, http_timeout_, connection_string_); + return std::make_shared>(context, http_timeout_, connection_string_, use_connection_pooling_); } const char * getStorageTypeName() const override { return "JDBC"; } @@ -67,9 +69,10 @@ public: private: BridgeHelperPtr createBridgeHelper(ContextPtr context, Poco::Timespan http_timeout_, - const std::string & connection_string_) const override + const std::string & connection_string_, + bool use_connection_pooling_) const override { - return std::make_shared>(context, http_timeout_, connection_string_); + return std::make_shared>(context, http_timeout_, connection_string_, use_connection_pooling_); } const char * getStorageTypeName() const override { return "ODBC"; } diff --git a/src/TableFunctions/TableFunctionDeltaLake.cpp b/src/TableFunctions/TableFunctionDeltaLake.cpp index e72de953858..4a285445016 100644 --- a/src/TableFunctions/TableFunctionDeltaLake.cpp +++ b/src/TableFunctions/TableFunctionDeltaLake.cpp @@ -31,7 +31,7 @@ void TableFunctionDeltaLake::parseArgumentsImpl( const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & base_configuration) { if (args.empty() || args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); + throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); auto * header_it = StorageURL::collectHeaders(args, base_configuration.headers, context); if (header_it != args.end()) diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index ff001661000..76108f1cdd4 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -21,13 +21,12 @@ void TableFunctionFactory::registerFunction( const std::string & name, Value value, CaseSensitiveness case_sensitiveness) { if (!table_functions.emplace(name, value).second) - throw Exception("TableFunctionFactory: the table function name '" + name + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: the table function name '{}' is not unique", name); if (case_sensitiveness == CaseInsensitive && !case_insensitive_table_functions.emplace(Poco::toLower(name), value).second) - throw Exception("TableFunctionFactory: the case insensitive table function name '" + name + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: " + "the case insensitive table function name '{}' is not unique", name); KnownTableFunctionNames::instance().add(name, (case_sensitiveness == CaseInsensitive)); } diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 4ecf29a05bd..9cd71196dcf 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -46,11 +46,10 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr fd = static_cast( (type == Field::Types::Int64) ? literal->value.get() : literal->value.get()); if (fd < 0) - throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "File descriptor must be non-negative"); } else - throw Exception( - "The first argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The first argument of table function '{}' mush be path or file descriptor", getName()); } String TableFunctionFile::getFormatFromFirstArgument() @@ -89,8 +88,7 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context if (structure == "auto") { if (fd >= 0) - throw Exception( - "Schema inference is not supported for table function '" + getName() + "' with file descriptor", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Schema inference is not supported for table function '{}' with file descriptor", getName()); size_t total_bytes_to_read = 0; Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index b15b350f00b..1e37775f574 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -4,6 +4,7 @@ #include #include +#include #include @@ -34,27 +35,33 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments", getName()); ASTs & args = args_func.at(0)->children; - if (args.size() != 2) - throw Exception("Table function '" + getName() + "' requires 2 arguments: format and data", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (args.size() != 2 && args.size() != 3) + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 2 or 3 arguments: format, [structure], data", getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); format = checkAndGetLiteralArgument(args[0], "format"); - data = checkAndGetLiteralArgument(args[1], "data"); + data = checkAndGetLiteralArgument(args.back(), "data"); + if (args.size() == 3) + structure = checkAndGetLiteralArgument(args[1], "structure"); } ColumnsDescription TableFunctionFormat::getActualTableStructure(ContextPtr context) const { - ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &) + if (structure == "auto") { - return std::make_unique(data); - }; - return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, false, context); + ReadBufferIterator read_buffer_iterator = [&](ColumnsDescription &) + { + return std::make_unique(data); + }; + return readSchemaFromFormat(format, std::nullopt, read_buffer_iterator, false, context); + } + return parseColumnsListFromString(structure, context); } Block TableFunctionFormat::parseData(ColumnsDescription columns, ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionFormat.h b/src/TableFunctions/TableFunctionFormat.h index c6db322343b..d64ab14cb64 100644 --- a/src/TableFunctions/TableFunctionFormat.h +++ b/src/TableFunctions/TableFunctionFormat.h @@ -28,6 +28,7 @@ private: String format; String data; + String structure = "auto"; }; } diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index bed3397152b..5f1a13d8857 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -33,7 +33,7 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; @@ -41,19 +41,18 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co return; if (args.size() > 4) - throw Exception("Table function '" + getName() + "' requires at most four arguments: " - " structure, [random_seed, max_string_length, max_array_length].", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires at most four arguments: " + " structure, [random_seed, max_string_length, max_array_length].", getName()); // All the arguments must be literals. for (const auto & arg : args) { if (!arg->as()) { - throw Exception(fmt::format( + throw Exception(ErrorCodes::BAD_ARGUMENTS, "All arguments of table function '{}' must be literals. " - "Got '{}' instead", getName(), arg->formatForErrorMessage()), - ErrorCodes::BAD_ARGUMENTS); + "Got '{}' instead", getName(), arg->formatForErrorMessage()); } } diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 73b77f770b2..7c84a281673 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -41,20 +41,18 @@ void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, Conte ASTs & args_func = ast_copy->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; - const auto message = fmt::format( - "The signature of table function {} shall be the following:\n" \ - " - cluster, uri\n",\ - " - cluster, uri, format\n",\ - " - cluster, uri, format, structure\n",\ - " - cluster, uri, format, structure, compression_method", - getName()); - if (args.size() < 2 || args.size() > 5) - throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "The signature of table function {} shall be the following:\n" + " - cluster, uri\n" + " - cluster, uri, format\n" + " - cluster, uri, format, structure\n" + " - cluster, uri, format, structure, compression_method", + getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); @@ -85,7 +83,7 @@ ColumnsDescription TableFunctionHDFSCluster::getActualTableStructure(ContextPtr StoragePtr TableFunctionHDFSCluster::getStorage( - const String & /*source*/, const String & /*format_*/, const ColumnsDescription &, ContextPtr context, + const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context, const std::string & table_name, const String & /*compression_method_*/) const { StoragePtr storage; @@ -96,7 +94,7 @@ StoragePtr TableFunctionHDFSCluster::getStorage( filename, StorageID(getDatabaseName(), table_name), format, - getActualTableStructure(context), + columns, ConstraintsDescription{}, String{}, context, @@ -109,8 +107,8 @@ StoragePtr TableFunctionHDFSCluster::getStorage( storage = std::make_shared( context, cluster_name, filename, StorageID(getDatabaseName(), table_name), - format, getActualTableStructure(context), ConstraintsDescription{}, - compression_method); + format, columns, ConstraintsDescription{}, + compression_method, structure != "auto"); } return storage; } diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h index a0555a904d1..9641b71c5e3 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ b/src/TableFunctions/TableFunctionHDFSCluster.h @@ -28,7 +28,6 @@ public: { return name; } - bool hasStaticStructure() const override { return true; } protected: StoragePtr getStorage( diff --git a/src/TableFunctions/TableFunctionHudi.cpp b/src/TableFunctions/TableFunctionHudi.cpp index e7b085d0d4e..781518ad77c 100644 --- a/src/TableFunctions/TableFunctionHudi.cpp +++ b/src/TableFunctions/TableFunctionHudi.cpp @@ -31,7 +31,7 @@ void TableFunctionHudi::parseArgumentsImpl( const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & base_configuration) { if (args.empty() || args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); + throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); auto * header_it = StorageURL::collectHeaders(args, base_configuration.headers, context); if (header_it != args.end()) diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index df87afcad3c..4941241acae 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -26,7 +26,7 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr const auto * function = ast_function->as(); if (!function->arguments) - throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments", getName()); auto args = function->arguments->children; @@ -37,8 +37,8 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr } if (args.size() != 1) - throw Exception("Table function '" + getName() + "' requires exactly 1 argument: structure", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires exactly 1 argument: structure", getName()); structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "structure"); } diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index b055e241459..742cc7f366b 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -42,16 +42,16 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function 'merge' requires exactly 2 arguments" - " - name of source database and regexp for table names.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function 'merge' requires exactly 2 arguments - name " + "of source database and regexp for table names."); ASTs & args = args_func.at(0)->children; if (args.size() != 2) - throw Exception("Table function 'merge' requires exactly 2 arguments" - " - name of source database and regexp for table names.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function 'merge' requires exactly 2 arguments - name " + "of source database and regexp for table names."); auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(args[0], context); diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index b88b7fda063..31dd64f8254 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -55,14 +55,15 @@ void TableFunctionMongoDB::parseArguments(const ASTPtr & ast_function, ContextPt { const auto & func_args = ast_function->as(); if (!func_args.arguments) - throw Exception("Table function 'mongodb' must have arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'mongodb' must have arguments."); ASTs & args = func_args.arguments->children; if (args.size() < 6 || args.size() > 7) { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Table function 'mongodb' requires from 6 to 7 parameters: mongodb('host:port', database, collection, 'user', 'password', structure, [, 'options'])"); + "Table function 'mongodb' requires from 6 to 7 parameters: " + "mongodb('host:port', database, collection, 'user', 'password', structure, [, 'options'])"); } ASTs main_arguments(args.begin(), args.begin() + 5); diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 58858eb495c..0cbad7bd9fd 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -35,7 +35,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr const auto & args_func = ast_function->as(); if (!args_func.arguments) - throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function 'mysql' must have arguments."); auto & args = args_func.arguments->children; diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp index 9ff07cc1946..d2c091a7b5f 100644 --- a/src/TableFunctions/TableFunctionNull.cpp +++ b/src/TableFunctions/TableFunctionNull.cpp @@ -21,13 +21,12 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c { const auto * function = ast_function->as(); if (!function || !function->arguments) - throw Exception("Table function '" + getName() + "' requires 'structure'", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'structure'", getName()); const auto & arguments = function->arguments->children; if (!arguments.empty() && arguments.size() != 1) - throw Exception( - "Table function '" + getName() + "' requires 'structure' argument or empty argument", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires 'structure' argument or empty argument", getName()); if (!arguments.empty()) structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context), "structure"); diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index 2056cd838f5..ba7a4dc4b36 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -37,7 +37,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_f auto arguments = function->arguments->children; if (arguments.size() != 1 && arguments.size() != 2) - throw Exception("Table function '" + getName() + "' requires 'length' or 'offset, length'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length' or 'offset, length'.", getName()); UInt64 offset = arguments.size() == 2 ? evaluateArgument(context, arguments[0]) : 0; UInt64 length = arguments.size() == 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); @@ -46,7 +46,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_f res->startup(); return res; } - throw Exception("Table function '" + getName() + "' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'limit' or 'offset, limit'.", getName()); } void registerTableFunctionNumbers(TableFunctionFactory & factory) @@ -65,7 +65,8 @@ UInt64 TableFunctionNumbers::evaluateArgument(ContextPtr context, Field converted = convertFieldToType(field, DataTypeUInt64()); if (converted.isNull()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", applyVisitor(FieldVisitorToString(), field)); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", + applyVisitor(FieldVisitorToString(), field)); return converted.safeGet(); } diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 2edfe82c708..ab6212d0e30 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -58,7 +58,7 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex { const auto & func_args = ast_function->as(); if (!func_args.arguments) - throw Exception("Table function 'PostgreSQL' must have arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'PostgreSQL' must have arguments."); configuration.emplace(StoragePostgreSQL::getConfiguration(func_args.arguments->children, context)); const auto & settings = context->getSettingsRef(); diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 14e0774cf06..1877c9fe65b 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -145,7 +145,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (!tryGetIdentifierNameInto(args[arg_num], cluster_name)) { if (!get_string_literal(*args[arg_num], cluster_description)) - throw Exception("Hosts pattern must be string literal (in single quotes).", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Hosts pattern must be string literal (in single quotes)."); } } @@ -243,7 +243,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr names.push_back(parseRemoteDescription(shard, 0, shard.size(), '|', max_addresses)); if (names.empty()) - throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Shard list is empty after parsing first argument"); auto maybe_secure_port = context->getTCPPortSecure(); @@ -276,7 +276,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr } if (!remote_table_function_ptr && configuration.table.empty()) - throw Exception("The name of remote table cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The name of remote table cannot be empty"); remote_table_id.database_name = configuration.database; remote_table_id.table_name = configuration.table; @@ -334,7 +334,7 @@ TableFunctionRemote::TableFunctionRemote(const std::string & name_, bool secure_ : name{name_}, secure{secure_} { is_cluster_function = (name == "cluster" || name == "clusterAllReplicas"); - help_message = fmt::format( + help_message = PreformattedMessage::create( "Table function '{}' requires from 2 to {} parameters: " ", , {}", name, diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h index 6f28f1ec9de..32039d1e6a8 100644 --- a/src/TableFunctions/TableFunctionRemote.h +++ b/src/TableFunctions/TableFunctionRemote.h @@ -35,7 +35,7 @@ private: std::string name; bool is_cluster_function; - std::string help_message; + PreformattedMessage help_message; bool secure; ClusterPtr cluster; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 687ee51a0a5..f7d5e9c9df2 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -38,7 +38,7 @@ void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & ar else { if (args.empty() || args.size() > 6) - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, error_message); + throw Exception::createDeprecated(error_message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); auto * header_it = StorageURL::collectHeaders(args, s3_configuration.headers, context); if (header_it != args.end()) @@ -127,7 +127,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con getName()); if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); auto & args = args_func.at(0)->children; diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 82790e1a328..9572d67c720 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -42,27 +42,25 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; for (auto & arg : args) arg = evaluateConstantExpressionAsLiteral(arg, context); - const auto message = fmt::format( - "The signature of table function {} could be the following:\n" \ - " - cluster, url\n" - " - cluster, url, format\n" \ - " - cluster, url, format, structure\n" \ - " - cluster, url, access_key_id, secret_access_key\n" \ - " - cluster, url, format, structure, compression_method\n" \ - " - cluster, url, access_key_id, secret_access_key, format\n" - " - cluster, url, access_key_id, secret_access_key, format, structure\n" \ - " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method", - getName()); - + constexpr auto fmt_string = "The signature of table function {} could be the following:\n" + " - cluster, url\n" + " - cluster, url, format\n" + " - cluster, url, format, structure\n" + " - cluster, url, access_key_id, secret_access_key\n" + " - cluster, url, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, format\n" + " - cluster, url, access_key_id, secret_access_key, format, structure\n" + " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method"; + auto message = PreformattedMessage{fmt::format(fmt_string, getName()), fmt_string}; if (args.size() < 2 || args.size() > 7) - throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception::createDeprecated(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); /// This arguments are always the first configuration.cluster_name = checkAndGetLiteralArgument(args[0], "cluster_name"); @@ -76,7 +74,7 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args)); /// StorageS3ClusterConfiguration inherints from StorageS3Configuration, so it is safe to upcast it. - TableFunctionS3::parseArgumentsImpl(message, clipped_args, context, static_cast(configuration)); + TableFunctionS3::parseArgumentsImpl(message.text, clipped_args, context, static_cast(configuration)); } diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index 64ff93494db..13c6fcea60c 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -62,13 +62,12 @@ void TableFunctionSQLite::parseArguments(const ASTPtr & ast_function, ContextPtr const auto & func_args = ast_function->as(); if (!func_args.arguments) - throw Exception("Table function 'sqlite' must have arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'sqlite' must have arguments."); ASTs & args = func_args.arguments->children; if (args.size() != 2) - throw Exception("SQLite database requires 2 arguments: database path, table name", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "SQLite database requires 2 arguments: database path, table name"); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index e7c8197a58c..5de6c6b4ccc 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -32,7 +32,7 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast, ContextPtr context) const auto & args = ast_function->children; if (args.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, bad_arguments_error_message); + throw Exception::createDeprecated(bad_arguments_error_message, ErrorCodes::BAD_ARGUMENTS); auto & url_function_args = assert_cast(args[0].get())->children; diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 05574825275..545427f30c9 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -56,7 +56,7 @@ static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args const Tuple & value_tuple = value_field.safeGet(); if (value_tuple.size() != sample_block.columns()) - throw Exception("Values size should match with number of columns", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Values size should match with number of columns"); const DataTypes & value_types_tuple = type_tuple->getElements(); for (size_t j = 0; j < value_tuple.size(); ++j) @@ -83,12 +83,12 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments", getName()); ASTs & args = args_func.at(0)->children; if (args.empty()) - throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires at least 1 argument", getName()); const auto & literal = args[0]->as(); String value; diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index c999cba08e9..578c497e720 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -32,7 +32,7 @@ void TableFunctionView::parseArguments(const ASTPtr & ast_function, ContextPtr / return; } } - throw Exception("Table function '" + getName() + "' requires a query argument.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires a query argument.", getName()); } ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionZeros.cpp b/src/TableFunctions/TableFunctionZeros.cpp index 5874fca67e6..d1c67659f56 100644 --- a/src/TableFunctions/TableFunctionZeros.cpp +++ b/src/TableFunctions/TableFunctionZeros.cpp @@ -34,7 +34,7 @@ StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_fun auto arguments = function->arguments->children; if (arguments.size() != 1) - throw Exception("Table function '" + getName() + "' requires 'length'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length'.", getName()); UInt64 length = evaluateArgument(context, arguments[0]); @@ -43,7 +43,7 @@ StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_fun res->startup(); return res; } - throw Exception("Table function '" + getName() + "' requires 'limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'limit'.", getName()); } void registerTableFunctionZeros(TableFunctionFactory & factory) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index f7c69445eed..a7af807c57c 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -311,11 +311,13 @@ class Backport: logging.info("Active releases: %s", ", ".join(self.release_branches)) def receive_prs_for_backport(self): - # The commit is the oldest open release branch's merge-base - since_commit = git_runner( - f"git merge-base {self.remote}/{self.release_branches[0]} " - f"{self.remote}/{self.default_branch}" + # The commits in the oldest open release branch + oldest_branch_commits = git_runner( + "git log --no-merges --format=%H --reverse " + f"{self.remote}/{self.default_branch}..{self.remote}/{self.release_branches[0]}" ) + # The first commit is the one we are looking for + since_commit = oldest_branch_commits.split("\n", 1)[0] since_date = date.fromisoformat( git_runner.run(f"git log -1 --format=format:%cs {since_commit}") ) diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index f914bb42d99..654cd6869dc 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -183,6 +183,7 @@ def prepare_tests_results_for_clickhouse( current_row["test_duration_ms"] = int(test_time * 1000) current_row["test_name"] = test_name current_row["test_status"] = test_status + current_row["test_context_raw"] = test_result.raw_logs or "" result.append(current_row) return result diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 7a87a93c26d..a5bb64889d1 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -80,10 +80,18 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = Path(result_folder) / "test_results.tsv" - test_results = read_test_results(results_path) - if len(test_results) == 0: - return "error", "Empty test_results.tsv", test_results, additional_files + try: + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path) + if len(test_results) == 0: + return "error", "Empty test_results.tsv", test_results, additional_files + except Exception as e: + return ( + "error", + f"Cannot parse test_results.tsv ({e})", + test_results, + additional_files, + ) return state, description, test_results, additional_files @@ -175,6 +183,7 @@ def main(): "test_log.txt" in test_output_files or "test_result.txt" in test_output_files ) test_result_exists = "test_results.tsv" in test_output_files + test_results = [] # type: TestResults if "submodule_log.txt" not in test_output_files: description = "Cannot clone repository" state = "failure" diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 3653aefeb77..cf5f53afbf9 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -163,17 +163,25 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = Path(result_folder) / "test_results.tsv" + try: + results_path = Path(result_folder) / "test_results.tsv" - if results_path.exists(): - logging.info("Found test_results.tsv") - else: - logging.info("Files in result folder %s", os.listdir(result_folder)) - return "error", "Not found test_results.tsv", test_results, additional_files + if results_path.exists(): + logging.info("Found test_results.tsv") + else: + logging.info("Files in result folder %s", os.listdir(result_folder)) + return "error", "Not found test_results.tsv", test_results, additional_files - test_results = read_test_results(results_path) - if len(test_results) == 0: - return "error", "Empty test_results.tsv", test_results, additional_files + test_results = read_test_results(results_path) + if len(test_results) == 0: + return "error", "Empty test_results.tsv", test_results, additional_files + except Exception as e: + return ( + "error", + f"Cannot parse test_results.tsv ({e})", + test_results, + additional_files, + ) return state, description, test_results, additional_files diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index 85933e27309..f864751e830 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -117,10 +117,18 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = Path(result_folder) / "test_results.tsv" - test_results = read_test_results(results_path, False) - if len(test_results) == 0: - return "error", "Empty test_results.tsv", test_results, additional_files + try: + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path, False) + if len(test_results) == 0: + return "error", "Empty test_results.tsv", test_results, additional_files + except Exception as e: + return ( + "error", + f"Cannot parse test_results.tsv ({e})", + test_results, + additional_files, + ) return state, description, test_results, additional_files diff --git a/tests/ci/merge_pr.py b/tests/ci/merge_pr.py index 847b49cbb28..6be02e497d9 100644 --- a/tests/ci/merge_pr.py +++ b/tests/ci/merge_pr.py @@ -7,13 +7,16 @@ import logging from datetime import datetime from os import getenv +from pprint import pformat from typing import Dict, List +from github.PaginatedList import PaginatedList from github.PullRequestReview import PullRequestReview +from github.WorkflowRun import WorkflowRun from commit_status_helper import get_commit_filtered_statuses from get_robot_token import get_best_robot_token -from github_helper import GitHub, NamedUser, PullRequest +from github_helper import GitHub, NamedUser, PullRequest, Repository from pr_info import PRInfo @@ -32,24 +35,20 @@ class Reviews: """ logging.info("Checking the PR for approvals") self.pr = pr - self.reviews = pr.get_reviews() - # the reviews are ordered by time - self._review_per_user = {} # type: Dict[NamedUser, PullRequestReview] - self.approved_at = datetime.fromtimestamp(0) - for r in self.reviews: + reviews = pr.get_reviews() + # self.reviews is a dict of latest CHANGES_REQUESTED or APPROVED review + # per user + # NamedUsed has proper __eq__ and __hash__, so it's safe to use it + self.reviews = {} # type: Dict[NamedUser, PullRequestReview] + for r in reviews: user = r.user - if r.state not in self.STATES: + + if not self.reviews.get(user): + self.reviews[user] = r continue - if r.state == "APPROVED": - self.approved_at = max(r.submitted_at, self.approved_at) - - if not self._review_per_user.get(user): - self._review_per_user[user] = r - continue - - if r.submitted_at < self._review_per_user[user].submitted_at: - self._review_per_user[user] = r + if r.submitted_at < self.reviews[user].submitted_at: + self.reviews[user] = r def is_approved(self, team: List[NamedUser]) -> bool: """Checks if the PR is approved, and no changes made after the last approval""" @@ -57,34 +56,37 @@ class Reviews: logging.info("There aren't reviews for PR #%s", self.pr.number) return False - # We consider reviews only from the given list of users - statuses = { - r.state - for user, r in self._review_per_user.items() - if r.state == "CHANGES_REQUESTED" - or (r.state == "APPROVED" and user in team) + filtered_reviews = { + user: review + for user, review in self.reviews.items() + if review.state in self.STATES and user in team } - if "CHANGES_REQUESTED" in statuses: + # We consider reviews only from the given list of users + changes_requested = { + user: review + for user, review in filtered_reviews.items() + if review.state == "CHANGES_REQUESTED" + } + + if changes_requested: logging.info( "The following users requested changes for the PR: %s", - ", ".join( - user.login - for user, r in self._review_per_user.items() - if r.state == "CHANGES_REQUESTED" - ), + ", ".join(user.login for user in changes_requested.keys()), ) return False - if "APPROVED" in statuses: + approved = { + user: review + for user, review in filtered_reviews.items() + if review.state == "APPROVED" + } + + if approved: logging.info( "The following users from %s team approved the PR: %s", TEAM_NAME, - ", ".join( - user.login - for user, r in self._review_per_user.items() - if r.state == "APPROVED" and user in team - ), + ", ".join(user.login for user in approved.keys()), ) # The only reliable place to get the 100% accurate last_modified # info is when the commit was pushed to GitHub. The info is @@ -101,10 +103,24 @@ class Reviews: last_changed = datetime.strptime( commit.stats.last_modified, "%a, %d %b %Y %H:%M:%S GMT" ) - if self.approved_at < last_changed: + + approved_at = max(review.submitted_at for review in approved.values()) + if approved_at == datetime.fromtimestamp(0): + logging.info( + "Unable to get `datetime.fromtimestamp(0)`, " + "here's debug info about reviews: %s", + "\n".join(pformat(review) for review in self.reviews.values()), + ) + else: + logging.info( + "The PR is approved at %s", + approved_at.isoformat(), + ) + + if approved_at < last_changed: logging.info( "There are changes after approve at %s", - self.approved_at.isoformat(), + approved_at.isoformat(), ) return False return True @@ -113,6 +129,20 @@ class Reviews: return False +def get_workflows_for_head(repo: Repository, head_sha: str) -> List[WorkflowRun]: + # The monkey-patch until the PR is merged: + # https://github.com/PyGithub/PyGithub/pull/2408 + return list( + PaginatedList( + WorkflowRun, + repo._requester, # type:ignore # pylint:disable=protected-access + f"{repo.url}/actions/runs", + {"head_sha": head_sha}, + list_item="workflow_runs", + ) + ) + + def parse_args() -> argparse.Namespace: pr_info = PRInfo() parser = argparse.ArgumentParser( @@ -120,11 +150,26 @@ def parse_args() -> argparse.Namespace: description="Script to merge the given PR. Additional checks for approved " "status and green commit statuses could be done", ) + parser.add_argument( + "--dry-run", + action="store_true", + help="if set, the script won't merge the PR, just check the conditions", + ) parser.add_argument( "--check-approved", action="store_true", help="if set, checks that the PR is approved and no changes required", ) + parser.add_argument( + "--check-running-workflows", default=True, help=argparse.SUPPRESS + ) + parser.add_argument( + "--no-check-running-workflows", + dest="check_running_workflows", + action="store_false", + default=argparse.SUPPRESS, + help="(dangerous) if set, skip checking for running workflows for the PR head", + ) parser.add_argument("--check-green", default=True, help=argparse.SUPPRESS) parser.add_argument( "--no-check-green", @@ -180,6 +225,19 @@ def main(): logging.info("The PR #%s is not ready for merge, stopping", pr.number) return + if args.check_running_workflows: + workflows = get_workflows_for_head(repo, pr.head.sha) + workflows_in_progress = [wf for wf in workflows if wf.status != "completed"] + # At most one workflow in progress is fine. We check that there no + # cases like, e.g. PullRequestCI and DocksCheck in progress at once + if len(workflows_in_progress) > 1: + logging.info( + "The PR #%s has more than one workflows in progress, check URLs:\n%s", + pr.number, + "\n".join(wf.html_url for wf in workflows_in_progress), + ) + return + if args.check_green: logging.info("Checking that all PR's statuses are green") commit = repo.get_commit(pr.head.sha) @@ -203,7 +261,8 @@ def main(): return logging.info("Merging the PR") - pr.merge() + if not args.dry_run: + pr.merge() if __name__ == "__main__": diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py deleted file mode 100755 index 97971f207ce..00000000000 --- a/tests/ci/push_to_artifactory.py +++ /dev/null @@ -1,367 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import argparse -import logging -import os -import re -from collections import namedtuple -from typing import Dict, List, Optional, Tuple - -from artifactory import ArtifactorySaaSPath # type: ignore -from build_download_helper import download_build_with_progress -from env_helper import S3_ARTIFACT_DOWNLOAD_TEMPLATE, RUNNER_TEMP -from git_helper import TAG_REGEXP, commit, removeprefix, removesuffix - - -# Necessary ENV variables -def getenv(name: str, default: Optional[str] = None) -> str: - env = os.getenv(name, default) - if env is not None: - return env - raise KeyError(f"Necessary {name} environment is not set") - - -TEMP_PATH = os.path.join(RUNNER_TEMP, "push_to_artifactory") -# One of the following ENVs is necessary -JFROG_API_KEY = getenv("JFROG_API_KEY", "") -JFROG_TOKEN = getenv("JFROG_TOKEN", "") - -CheckDesc = namedtuple("CheckDesc", ("check_name", "deb_arch", "rpm_arch")) - - -class Packages: - checks = ( - CheckDesc("package_release", "amd64", "x86_64"), - CheckDesc("package_aarch64", "arm64", "aarch64"), - ) - packages = ( - "clickhouse-client", - "clickhouse-common-static", - "clickhouse-common-static-dbg", - "clickhouse-server", - ) - - def __init__(self, version: str): - # Dicts of name: s3_path_suffix - self.deb = {} # type: Dict[str, str] - self.rpm = {} # type: Dict[str, str] - self.tgz = {} # type: Dict[str, str] - for check in self.checks: - for name in self.packages: - deb = f"{name}_{version}_{check.deb_arch}.deb" - self.deb[deb] = f"{check.check_name}/{deb}" - - rpm = f"{name}-{version}.{check.rpm_arch}.rpm" - self.rpm[rpm] = f"{check.check_name}/{rpm}" - - tgz = f"{name}-{version}-{check.deb_arch}.tgz" - self.tgz[tgz] = f"{check.check_name}/{tgz}" - - def arch(self, deb_pkg: str) -> str: - if deb_pkg not in self.deb: - raise ValueError(f"{deb_pkg} not in {self.deb}") - return removesuffix(deb_pkg, ".deb").split("_")[-1] - - def replace_with_fallback(self, name: str) -> None: - if name.endswith(".deb"): - suffix = self.deb.pop(name) - self.deb[self.fallback_to_all(name)] = self.fallback_to_all(suffix) - elif name.endswith(".rpm"): - suffix = self.rpm.pop(name) - self.rpm[self.fallback_to_all(name)] = self.fallback_to_all(suffix) - elif name.endswith(".tgz"): - suffix = self.tgz.pop(name) - self.tgz[self.fallback_to_all(name)] = self.fallback_to_all(suffix) - else: - raise KeyError(f"unknown package type for {name}") - - @staticmethod - def path(package_file: str) -> str: - return os.path.join(TEMP_PATH, package_file) - - @staticmethod - def fallback_to_all(url_or_name: str) -> str: - """Until July 2022 we had clickhouse-server and clickhouse-client with - arch 'all'""" - # deb - if url_or_name.endswith("amd64.deb") or url_or_name.endswith("arm64.deb"): - return f"{url_or_name[:-9]}all.deb" - # rpm - if url_or_name.endswith("x86_64.rpm") or url_or_name.endswith("aarch64.rpm"): - new = removesuffix(removesuffix(url_or_name, "x86_64.rpm"), "aarch64.rpm") - return f"{new}noarch.rpm" - # tgz - if url_or_name.endswith("-amd64.tgz") or url_or_name.endswith("-arm64.tgz"): - return f"{url_or_name[:-10]}.tgz" - return url_or_name - - -class S3: - def __init__( - self, - pr: int, - commit: str, - version: str, - force_download: bool, - ): - self._common = dict( - pr_or_release=pr, - commit=commit, - ) - self.force_download = force_download - self.packages = Packages(version) - - def download_package(self, package_file: str, s3_path_suffix: str) -> None: - path = Packages.path(package_file) - fallback_path = Packages.fallback_to_all(path) - if not self.force_download and ( - os.path.exists(path) or os.path.exists(fallback_path) - ): - if os.path.exists(fallback_path): - self.packages.replace_with_fallback(package_file) - - return - build_name, artifact = s3_path_suffix.split("/") - url = S3_ARTIFACT_DOWNLOAD_TEMPLATE.format_map( - {**self._common, "build_name": build_name, "artifact": artifact} - ) - try: - download_build_with_progress(url, path) - except Exception as e: - if "Cannot download dataset from" in e.args[0]: - new_url = Packages.fallback_to_all(url) - logging.warning( - "Fallback downloading %s for old release", fallback_path - ) - download_build_with_progress(new_url, fallback_path) - self.packages.replace_with_fallback(package_file) - - def download_deb(self): - # Copy to have a way to pop/add fallback packages - packages = self.packages.deb.copy() - for package_file, s3_path_suffix in packages.items(): - self.download_package(package_file, s3_path_suffix) - - def download_rpm(self): - # Copy to have a way to pop/add fallback packages - packages = self.packages.rpm.copy() - for package_file, s3_path_suffix in packages.items(): - self.download_package(package_file, s3_path_suffix) - - def download_tgz(self): - # Copy to have a way to pop/add fallback packages - packages = self.packages.tgz.copy() - for package_file, s3_path_suffix in packages.items(): - self.download_package(package_file, s3_path_suffix) - - -class Release: - def __init__(self, name: str): - r = re.compile(TAG_REGEXP) - # Automatically remove refs/tags/ if full refname passed here - name = removeprefix(name, "refs/tags/") - if not r.match(name): - raise argparse.ArgumentTypeError( - f"release name {name} does not match " - "v12.1.2.15-(testing|prestable|stable|lts) pattern" - ) - self._name = name - self._version = removeprefix(self._name, "v") - self._version = self.version.split("-")[0] - self._version_parts = tuple(self.version.split(".")) - self._type = self._name.split("-")[-1] - - @property - def version(self) -> str: - return self._version - - @property - def version_parts(self) -> Tuple[str, ...]: - return self._version_parts - - @property - def type(self) -> str: - return self._type - - -class Artifactory: - def __init__( - self, - url: str, - release: str, - deb_repo: str = "deb", - rpm_repo: str = "rpm", - tgz_repo: str = "tgz", - ): - self._url = url - self._release = release - self._deb_url = "/".join((self._url, deb_repo, "pool", self._release)) + "/" - self._rpm_url = "/".join((self._url, rpm_repo, self._release)) + "/" - self._tgz_url = "/".join((self._url, tgz_repo, self._release)) + "/" - # check the credentials ENVs for early exit - self.__path_helper("_deb", "") - - def deploy_deb(self, packages: Packages) -> None: - for package_file in packages.deb: - path = packages.path(package_file) - dist = self._release - comp = "main" - arch = packages.arch(package_file) - logging.info( - "Deploy %s(distribution=%s;component=%s;architecture=%s) " - "to artifactory", - path, - dist, - comp, - arch, - ) - self.deb_path(package_file).deploy_deb(path, dist, comp, arch) - - def deploy_rpm(self, packages: Packages) -> None: - for package_file in packages.rpm: - path = packages.path(package_file) - logging.info("Deploy %s to artifactory", path) - self.rpm_path(package_file).deploy_file(path) - - def deploy_tgz(self, packages: Packages) -> None: - for package_file in packages.tgz: - path = packages.path(package_file) - logging.info("Deploy %s to artifactory", path) - self.tgz_path(package_file).deploy_file(path) - - def __path_helper(self, name: str, package_file: str) -> ArtifactorySaaSPath: - url = "/".join((getattr(self, name + "_url"), package_file)) - path = None - if JFROG_API_KEY: - path = ArtifactorySaaSPath(url, apikey=JFROG_API_KEY) - elif JFROG_TOKEN: - path = ArtifactorySaaSPath(url, token=JFROG_TOKEN) - else: - raise KeyError("Neither JFROG_API_KEY nor JFROG_TOKEN env are defined") - return path - - def deb_path(self, package_file: str) -> ArtifactorySaaSPath: - return self.__path_helper("_deb", package_file) - - def rpm_path(self, package_file: str) -> ArtifactorySaaSPath: - return self.__path_helper("_rpm", package_file) - - def tgz_path(self, package_file: str) -> ArtifactorySaaSPath: - return self.__path_helper("_tgz", package_file) - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="Program to download artifacts from S3 and push them to " - "artifactory. ENV variables JFROG_API_KEY and JFROG_TOKEN are used " - "for authentication in the given order", - ) - parser.add_argument( - "--release", - required=True, - type=Release, - help="release name, e.g. v12.13.14.15-prestable; 'refs/tags/' " - "prefix is striped automatically", - ) - parser.add_argument( - "--pull-request", - type=int, - default=0, - help="pull request number; if PR is omitted, the first two numbers " - "from release will be used, e.g. 12.11", - ) - parser.add_argument( - "--commit", required=True, type=commit, help="commit hash for S3 bucket" - ) - parser.add_argument( - "--all", action="store_true", help="implies all deb, rpm and tgz" - ) - parser.add_argument( - "--deb", action="store_true", help="if Debian packages should be processed" - ) - parser.add_argument( - "--rpm", action="store_true", help="if RPM packages should be processed" - ) - parser.add_argument( - "--tgz", - action="store_true", - help="if tgz archives should be processed. They aren't pushed to artifactory", - ) - parser.add_argument( - "--artifactory-url", - default="https://clickhousedb.jfrog.io/artifactory", - help="SaaS Artifactory url", - ) - parser.add_argument("--artifactory", default=True, help=argparse.SUPPRESS) - parser.add_argument( - "-n", - "--no-artifactory", - action="store_false", - dest="artifactory", - default=argparse.SUPPRESS, - help="do not push packages to artifactory", - ) - parser.add_argument("--force-download", default=True, help=argparse.SUPPRESS) - parser.add_argument( - "--no-force-download", - action="store_false", - dest="force_download", - default=argparse.SUPPRESS, - help="do not download packages again if they exist already", - ) - - args = parser.parse_args() - if args.all: - args.deb = args.rpm = args.tgz = True - if not (args.deb or args.rpm or args.tgz): - parser.error("at least one of --deb, --rpm or --tgz should be specified") - if args.pull_request == 0: - args.pull_request = ".".join(args.release.version_parts[:2]) - return args - - -def process_deb(s3: S3, art_clients: List[Artifactory]) -> None: - s3.download_deb() - for art_client in art_clients: - art_client.deploy_deb(s3.packages) - - -def process_rpm(s3: S3, art_clients: List[Artifactory]) -> None: - s3.download_rpm() - for art_client in art_clients: - art_client.deploy_rpm(s3.packages) - - -def process_tgz(s3: S3, art_clients: List[Artifactory]) -> None: - s3.download_tgz() - for art_client in art_clients: - art_client.deploy_tgz(s3.packages) - - -def main(): - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - args = parse_args() - os.makedirs(TEMP_PATH, exist_ok=True) - s3 = S3( - args.pull_request, - args.commit, - args.release.version, - args.force_download, - ) - art_clients = [] - if args.artifactory: - art_clients.append(Artifactory(args.artifactory_url, args.release.type)) - if args.release.type == "lts": - art_clients.append(Artifactory(args.artifactory_url, "stable")) - - if args.deb: - process_deb(s3, art_clients) - if args.rpm: - process_rpm(s3, art_clients) - if args.tgz: - process_tgz(s3, art_clients) - - -if __name__ == "__main__": - main() diff --git a/tests/ci/release.py b/tests/ci/release.py index 7bcb12ef8b7..a70493c7070 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -71,33 +71,47 @@ class Release: repo: Repo, release_commit: str, release_type: Literal["major", "minor", "patch"], + dry_run: bool, + with_stderr: bool, ): self.repo = repo self._release_commit = "" self.release_commit = release_commit + self.dry_run = dry_run + self.with_stderr = with_stderr assert release_type in self.BIG + self.SMALL self.release_type = release_type self._git = Git() self._version = get_version_from_repo(git=self._git) + self.release_version = self.version self._release_branch = "" self._rollback_stack = [] # type: List[str] - def run(self, cmd: str, cwd: Optional[str] = None, **kwargs: Any) -> str: + def run( + self, cmd: str, cwd: Optional[str] = None, dry_run: bool = False, **kwargs: Any + ) -> str: cwd_text = "" if cwd: cwd_text = f" (CWD='{cwd}')" + if dry_run: + logging.info("Would run command%s:\n %s", cwd_text, cmd) + return "" + if not self.with_stderr: + kwargs["stderr"] = subprocess.DEVNULL + logging.info("Running command%s:\n %s", cwd_text, cmd) return self._git.run(cmd, cwd, **kwargs) - def set_release_branch(self): + def set_release_info(self): # Fetch release commit and tags in case they don't exist locally self.run(f"git fetch {self.repo.url} {self.release_commit}") self.run(f"git fetch {self.repo.url} --tags") # Get the actual version for the commit before check with self._checkout(self.release_commit, True): - self.read_version() self.release_branch = f"{self.version.major}.{self.version.minor}" + self.release_version = get_version_from_repo(git=self._git) + self.release_version.with_description(self.get_stable_release_type()) self.read_version() @@ -157,9 +171,7 @@ class Release: self.check_commit_release_ready() - def do( - self, check_dirty: bool, check_branch: bool, with_release_branch: bool - ) -> None: + def do(self, check_dirty: bool, check_branch: bool) -> None: self.check_prerequisites() if check_dirty: @@ -172,34 +184,43 @@ class Release: if self._git.branch != "master": raise Exception("the script must be launched only from master") - self.set_release_branch() + self.set_release_info() if check_branch: self.check_branch() - with self._checkout(self.release_commit, True): - if self.release_type in self.BIG: + if self.release_type in self.BIG: + if self._version.minor >= 12 and self.release_type != "major": + raise ValueError( + "The relese type must be 'major' for minor versions>=12" + ) + + with self._checkout(self.release_commit, True): # Checkout to the commit, it will provide the correct current version - if with_release_branch: + with self.testing(): with self.create_release_branch(): - logging.info("Prestable part of the releasing is done") - else: - logging.info("Skipping creating release branch stage") + logging.info( + "Publishing release %s from commit %s is done", + self.release_version.describe, + self.release_commit, + ) - rollback = self._rollback_stack.copy() - try: - with self.testing(): - logging.info("Testing part of the releasing is done") - except (Exception, KeyboardInterrupt): - logging.fatal("Testing part failed, rollback previous steps") - rollback.reverse() - for cmd in rollback: - self.run(cmd) - raise - - elif self.release_type in self.SMALL: + elif self.release_type in self.SMALL: + with self._checkout(self.release_commit, True): with self.stable(): - logging.info("Stable part of the releasing is done") + logging.info( + "Publishing release %s from commit %s is done", + self.release_version.describe, + self.release_commit, + ) + + if self.dry_run: + logging.info("Dry running, clean out possible changes") + rollback = self._rollback_stack.copy() + rollback.reverse() + for cmd in rollback: + self.run(cmd) + return self.log_post_workflows() self.log_rollback() @@ -237,6 +258,21 @@ class Release: f"for {self.release_type} release" ) + def _commit_cmake_contributors(self, version: ClickHouseVersion) -> None: + update_cmake_version(version) + update_contributors(raise_error=True) + if self.dry_run: + logging.info( + "Dry running, resetting the following changes in the repo:\n%s", + self.run(f"git diff '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'"), + ) + self.run(f"git checkout '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'") + self.run( + f"git commit -m 'Update version to {version.string}' " + f"'{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'", + dry_run=self.dry_run, + ) + def log_rollback(self): if self._rollback_stack: rollback = self._rollback_stack.copy() @@ -262,12 +298,8 @@ class Release: self.read_version() with self._create_branch(self.release_branch, self.release_commit): with self._checkout(self.release_branch, True): - self.read_version() - self.version.with_description(self.get_stable_release_type()) - with self._create_gh_release(False): - with self._bump_release_branch(): - # At this point everything will rollback automatically - yield + with self._bump_release_branch(): + yield @contextmanager def stable(self): @@ -280,6 +312,15 @@ class Release: self.version.with_description(version_type) update_cmake_version(self.version) update_contributors(raise_error=True) + if self.dry_run: + logging.info( + "Dry running, resetting the following changes in the repo:\n%s", + self.run( + f"git diff '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'" + ), + ) + self.run(f"git checkout '{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'") + # Checkouting the commit of the branch and not the branch itself, # then we are able to skip rollback with self._checkout(f"{self.release_branch}^0", False): @@ -287,7 +328,8 @@ class Release: self.run( f"git commit -m " f"'Update version to {self.version.string}' " - f"'{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'" + f"'{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'", + dry_run=self.dry_run, ) with self._push( "HEAD", with_rollback_on_fail=False, remote_ref=self.release_branch @@ -295,7 +337,7 @@ class Release: # DO NOT PUT ANYTHING ELSE HERE # The push must be the last action and mean the successful release self._rollback_stack.append( - f"git push {self.repo.url} " + f"{self.dry_run_prefix}git push {self.repo.url} " f"+{current_commit}:{self.release_branch}" ) yield @@ -337,6 +379,12 @@ class Release: def release_commit(self, release_commit: str) -> None: self._release_commit = commit(release_commit) + @property + def dry_run_prefix(self) -> str: + if self.dry_run: + return "# " + return "" + @contextmanager def _bump_release_branch(self): # Update only git, origal version stays the same @@ -347,12 +395,7 @@ class Release: if version_type == VersionType.LTS: pr_labels += " --label release-lts" new_version.with_description(version_type) - update_cmake_version(new_version) - update_contributors(raise_error=True) - self.run( - f"git commit -m 'Update version to {new_version.string}' " - f"'{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'" - ) + self._commit_cmake_contributors(new_version) with self._push(self.release_branch): with self._create_gh_label( f"v{self.release_branch}-must-backport", "10dbed" @@ -360,35 +403,35 @@ class Release: with self._create_gh_label( f"v{self.release_branch}-affected", "c2bfff" ): - # The following command is rolled back by self._push + # The following command is rolled back by deleting branch + # in self._push self.run( f"gh pr create --repo {self.repo} --title " f"'Release pull request for branch {self.release_branch}' " f"--head {self.release_branch} {pr_labels} " "--body 'This PullRequest is a part of ClickHouse release " "cycle. It is used by CI system only. Do not perform any " - "changes with it.'" + "changes with it.'", + dry_run=self.dry_run, ) - # Here the release branch part is done - yield + with self._create_gh_release(False): + # Here the release branch part is done + yield @contextmanager def _bump_testing_version(self, helper_branch: str) -> Iterator[None]: self.read_version() self.version = self.version.update(self.release_type) self.version.with_description(VersionType.TESTING) - update_cmake_version(self.version) - update_contributors(raise_error=True) - self.run( - f"git commit -m 'Update version to {self.version.string}' " - f"'{self.CMAKE_PATH}' '{self.CONTRIBUTORS_PATH}'" - ) + self._commit_cmake_contributors(self.version) with self._push(helper_branch): body_file = get_abs_path(".github/PULL_REQUEST_TEMPLATE.md") + # The following command is rolled back by deleting branch in self._push self.run( f"gh pr create --repo {self.repo} --title 'Update version after " f"release' --head {helper_branch} --body-file '{body_file}' " - "--label 'do not test' --assignee @me" + "--label 'do not test' --assignee @me", + dry_run=self.dry_run, ) # Here the testing part is done yield @@ -402,6 +445,8 @@ class Release: self.run(f"git checkout {ref}") # checkout is not put into rollback_stack intentionally rollback_cmd = f"git checkout {orig_ref}" + # always update version and git after checked out ref + self.read_version() try: yield except (Exception, KeyboardInterrupt): @@ -415,6 +460,7 @@ class Release: @contextmanager def _create_branch(self, name: str, start_point: str = "") -> Iterator[None]: self.run(f"git branch {name} {start_point}") + rollback_cmd = f"git branch -D {name}" self._rollback_stack.append(rollback_cmd) try: @@ -428,9 +474,12 @@ class Release: def _create_gh_label(self, label: str, color_hex: str) -> Iterator[None]: # API call, https://docs.github.com/en/rest/reference/issues#create-a-label self.run( - f"gh api repos/{self.repo}/labels -f name={label} -f color={color_hex}" + f"gh api repos/{self.repo}/labels -f name={label} -f color={color_hex}", + dry_run=self.dry_run, + ) + rollback_cmd = ( + f"{self.dry_run_prefix}gh api repos/{self.repo}/labels/{label} -X DELETE" ) - rollback_cmd = f"gh api repos/{self.repo}/labels/{label} -X DELETE" self._rollback_stack.append(rollback_cmd) try: yield @@ -443,15 +492,19 @@ class Release: def _create_gh_release(self, as_prerelease: bool) -> Iterator[None]: with self._create_tag(): # Preserve tag if version is changed - tag = self.version.describe + tag = self.release_version.describe prerelease = "" if as_prerelease: prerelease = "--prerelease" self.run( f"gh release create {prerelease} --repo {self.repo} " - f"--title 'Release {tag}' '{tag}'" + f"--title 'Release {tag}' '{tag}'", + dry_run=self.dry_run, + ) + rollback_cmd = ( + f"{self.dry_run_prefix}gh release delete --yes " + f"--repo {self.repo} '{tag}'" ) - rollback_cmd = f"gh release delete --yes --repo {self.repo} '{tag}'" self._rollback_stack.append(rollback_cmd) try: yield @@ -462,12 +515,15 @@ class Release: @contextmanager def _create_tag(self): - tag = self.version.describe - self.run(f"git tag -a -m 'Release {tag}' '{tag}'") - rollback_cmd = f"git tag -d '{tag}'" + tag = self.release_version.describe + self.run( + f"git tag -a -m 'Release {tag}' '{tag}' {self.release_commit}", + dry_run=self.dry_run, + ) + rollback_cmd = f"{self.dry_run_prefix}git tag -d '{tag}'" self._rollback_stack.append(rollback_cmd) try: - with self._push(f"'{tag}'"): + with self._push(tag): yield except (Exception, KeyboardInterrupt): logging.warning("Rolling back tag %s", tag) @@ -481,9 +537,11 @@ class Release: if remote_ref == "": remote_ref = ref - self.run(f"git push {self.repo.url} {ref}:{remote_ref}") + self.run(f"git push {self.repo.url} {ref}:{remote_ref}", dry_run=self.dry_run) if with_rollback_on_fail: - rollback_cmd = f"git push -d {self.repo.url} {remote_ref}" + rollback_cmd = ( + f"{self.dry_run_prefix}git push -d {self.repo.url} {remote_ref}" + ) self._rollback_stack.append(rollback_cmd) try: @@ -532,13 +590,6 @@ def parse_args() -> argparse.Namespace: "new branch is created only for 'major' and 'minor'", ) parser.add_argument("--with-release-branch", default=True, help=argparse.SUPPRESS) - parser.add_argument( - "--no-release-branch", - dest="with_release_branch", - action="store_false", - default=argparse.SUPPRESS, - help=f"if set, for release types in {Release.BIG} skip creating release branch", - ) parser.add_argument("--check-dirty", default=True, help=argparse.SUPPRESS) parser.add_argument( "--no-check-dirty", @@ -558,6 +609,16 @@ def parse_args() -> argparse.Namespace: "works only for a release branches, that name " "should be the same as '$MAJOR.$MINOR' version, e.g. 22.2", ) + parser.add_argument( + "--dry-run", + action="store_true", + help="do not make any actual changes in the repo, just show what will be done", + ) + parser.add_argument( + "--with-stderr", + action="store_true", + help="if set, the stderr of all subprocess commands will be printed as well", + ) return parser.parse_args() @@ -566,9 +627,11 @@ def main(): logging.basicConfig(level=logging.INFO) args = parse_args() repo = Repo(args.repo, args.remote_protocol) - release = Release(repo, args.commit, args.release_type) + release = Release( + repo, args.commit, args.release_type, args.dry_run, args.with_stderr + ) - release.do(args.check_dirty, args.check_branch, args.with_release_branch) + release.do(args.check_dirty, args.check_branch) if __name__ == "__main__": diff --git a/tests/ci/report.py b/tests/ci/report.py index da04411632d..d33ed4a9d91 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -211,7 +211,7 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes name = line[0] status = line[1] time = None - if len(line) >= 3 and line[2]: + if len(line) >= 3 and line[2] and line[2] != "\\N": # The value can be emtpy, but when it's not, # it's the time spent on the test try: diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 4116dbc52ce..fb38969cb23 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -93,10 +93,18 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = Path(result_folder) / "test_results.tsv" - test_results = read_test_results(results_path, False) - if len(test_results) == 0: - raise Exception("Empty results") + try: + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path, True) + if len(test_results) == 0: + raise Exception("Empty results") + except Exception as e: + return ( + "error", + f"Cannot parse test_results.tsv ({e})", + test_results, + additional_files, + ) return state, description, test_results, additional_files diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index f80678fe8ba..4869301785e 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -29,11 +29,13 @@ class TeePopen: self.env = env or os.environ.copy() self._process = None # type: Optional[Popen] self.timeout = timeout + self.timeout_exceeded = False def _check_timeout(self) -> None: if self.timeout is None: return sleep(self.timeout) + self.timeout_exceeded = True while self.process.poll() is None: logging.warning( "Killing process %s, timeout %s exceeded", @@ -62,6 +64,16 @@ class TeePopen: def __exit__(self, exc_type, exc_value, traceback): self.wait() + if self.timeout_exceeded: + exceeded_log = ( + f"Command `{self.command}` has failed, " + f"timeout {self.timeout}s is exceeded" + ) + if self.process.stdout is not None: + sys.stdout.write(exceeded_log) + + self.log_file.write(exceeded_log) + self.log_file.close() def wait(self) -> int: diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 3fec6b612b1..50d940bc23c 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -232,19 +232,52 @@ def need_retry(args, stdout, stderr, total_time): ) -def get_processlist(args): +def get_processlist_with_stacktraces(args): try: if args.replicated_database: return clickhouse_execute_json( args, """ - SELECT materialize((hostName(), tcpPort())) as host, * - FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) - WHERE query NOT LIKE '%system.processes%' + SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, * + -- NOTE: view() here to do JOIN on shards, instead of initiator + FROM clusterAllReplicas('test_cluster_database_replicated', view( + SELECT + groupArray((s.thread_id, arrayStringConcat(arrayMap( + x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), + s.trace), '\n') AS stacktrace + )) AS stacktraces, + p.* + FROM system.processes p + JOIN system.stack_trace s USING (query_id) + WHERE query NOT LIKE '%system.processes%' + GROUP BY p.* + )) + ORDER BY elapsed DESC """, + settings={ + "allow_introspection_functions": 1, + }, ) else: - return clickhouse_execute_json(args, "SHOW PROCESSLIST") + return clickhouse_execute_json( + args, + """ + SELECT + groupArray((s.thread_id, arrayStringConcat(arrayMap( + x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), + s.trace), '\n') AS stacktrace + )) AS stacktraces, + p.* + FROM system.processes p + JOIN system.stack_trace s USING (query_id) + WHERE query NOT LIKE '%system.processes%' + GROUP BY p.* + ORDER BY elapsed DESC + """, + settings={ + "allow_introspection_functions": 1, + }, + ) except Exception as e: return "Failed to get processlist: " + str(e) @@ -458,7 +491,7 @@ class SettingsRandomizer: if random.random() < 0.2 else random.randint(1, 1024 * 1024 * 1024), "local_filesystem_read_method": lambda: random.choice( - ["read", "pread", "mmap", "pread_threadpool"] + ["read", "pread", "mmap", "pread_threadpool", "io_uring"] ), "remote_filesystem_read_method": lambda: random.choice(["read", "threadpool"]), "local_filesystem_read_prefetch": lambda: random.randint(0, 1), @@ -757,6 +790,9 @@ class TestCase: ): description = "" + if debug_log: + debug_log = "\n".join(debug_log.splitlines()[:100]) + if proc: if proc.returncode is None: try: @@ -899,7 +935,7 @@ class TestCase: if ( self.testcase_args.test_runs > 1 - and total_time > 60 + and total_time > 120 and "long" not in self.tags ): if debug_log: @@ -1105,6 +1141,9 @@ class TestCase: proc, stdout, stderr, debug_log, total_time ) result.check_if_need_retry(args, stdout, stderr, self.runs_count) + # to avoid breaking CSV parser + result.description = result.description.replace('\0', '') + if result.status == TestStatus.FAIL: result.description = self.add_info_about_settings( args, result.description @@ -1223,7 +1262,7 @@ class TestSuite: line = line.strip() if line and not is_shebang(line): return line - return '' + return "" def load_tags_from_file(filepath): comment_sign = get_comment_sign(filepath) @@ -1750,7 +1789,7 @@ def removesuffix(text, *suffixes): return text -def reportCoverageFor(args, what, query, permissive = False): +def reportCoverageFor(args, what, query, permissive=False): value = clickhouse_execute(args, query).decode() if value != "": @@ -1763,10 +1802,13 @@ def reportCoverageFor(args, what, query, permissive = False): def reportCoverage(args): - return reportCoverageFor( - args, - "functions", - """ + clickhouse_execute(args, "SYSTEM FLUSH LOGS") + + return ( + reportCoverageFor( + args, + "functions", + """ SELECT name FROM system.functions WHERE NOT is_aggregate AND origin = 'System' AND alias_to = '' @@ -1776,11 +1818,12 @@ def reportCoverage(args): ) ORDER BY name """, - True - ) and reportCoverageFor( - args, - "aggregate functions", - """ + True, + ) + and reportCoverageFor( + args, + "aggregate functions", + """ SELECT name FROM system.functions WHERE is_aggregate AND origin = 'System' AND alias_to = '' @@ -1789,11 +1832,12 @@ def reportCoverage(args): SELECT arrayJoin(used_aggregate_functions) FROM system.query_log WHERE event_date >= yesterday() ) ORDER BY name - """ - ) and reportCoverageFor( - args, - "aggregate function combinators", - """ + """, + ) + and reportCoverageFor( + args, + "aggregate function combinators", + """ SELECT name FROM system.aggregate_function_combinators WHERE NOT is_internal @@ -1802,11 +1846,12 @@ def reportCoverage(args): SELECT arrayJoin(used_aggregate_function_combinators) FROM system.query_log WHERE event_date >= yesterday() ) ORDER BY name - """ - ) and reportCoverageFor( - args, - "data type families", - """ + """, + ) + and reportCoverageFor( + args, + "data type families", + """ SELECT name FROM system.data_type_families WHERE alias_to = '' AND name NOT LIKE 'Interval%' @@ -1815,7 +1860,8 @@ def reportCoverage(args): SELECT arrayJoin(used_data_type_families) FROM system.query_log WHERE event_date >= yesterday() ) ORDER BY name - """ + """, + ) ) def reportLogStats(args): @@ -1844,7 +1890,7 @@ def reportLogStats(args): LIMIT 100 FORMAT TSVWithNamesAndTypes """ - value = clickhouse_execute(args, query).decode() + value = clickhouse_execute(args, query).decode(errors="replace") print("\nTop patterns of log messages:\n") print(value) print("\n") @@ -1856,19 +1902,66 @@ def reportLogStats(args): count() AS count, substr(replaceRegexpAll(message, '[^A-Za-z]+', ''), 1, 32) AS pattern, substr(any(message), 1, 256) as runtime_message, - any((extract(source_file, '\/[a-zA-Z0-9_]+\.[a-z]+'), source_line)) as line + any((extract(source_file, '\/[a-zA-Z0-9_]+\.[a-z]+'), source_line)) as line FROM system.text_log WHERE (now() - toIntervalMinute(mins)) < event_time AND message_format_string = '' GROUP BY pattern ORDER BY count DESC - LIMIT 50 + LIMIT 30 FORMAT TSVWithNamesAndTypes """ - value = clickhouse_execute(args, query).decode() + value = clickhouse_execute(args, query).decode(errors="replace") print("\nTop messages without format string (fmt::runtime):\n") print(value) print("\n") + query = """ + SELECT message_format_string, count(), substr(any(message), 1, 120) AS any_message + FROM system.text_log + WHERE (now() - toIntervalMinute(120)) < event_time + AND (message NOT LIKE (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') AS s)) + AND (message NOT LIKE concat('%Exception: ', s, '%')) + GROUP BY message_format_string ORDER BY count() DESC LIMIT 20 FORMAT TSVWithNamesAndTypes + """ + value = clickhouse_execute(args, query).decode(errors="replace") + print("\nTop messages that does not match its format string:\n") + print(value) + print("\n") + + query = """ + WITH ('', '({}) Keys: {}', '({}) {}', 'Aggregating', 'Became leader', 'Cleaning queue', 'Creating set.', + 'Cyclic aliases', 'Detaching {}', 'Executing {}', 'Fire events: {}', 'Found part {}', 'Loaded queue', + 'No sharding key', 'No tables', 'Query: {}', 'Removed', 'Removed part {}', 'Removing parts.', + 'Request URI: {}', 'Sending part {}', 'Sent handshake', 'Starting {}', 'Will mimic {}', 'Writing to {}', + 'dropIfEmpty', 'loadAll {}', '{} ({}:{})', '{} -> {}', '{} {}', '{}: {}' + ) AS known_short_messages + SELECT count() AS c, message_format_string, substr(any(message), 1, 120) + FROM system.text_log + WHERE (now() - toIntervalMinute(120)) < event_time + AND (length(message_format_string) < 16 + OR (length(message_format_string) < 30 AND message ilike '%DB::Exception%')) + AND message_format_string NOT IN known_short_messages + GROUP BY message_format_string ORDER BY c DESC LIMIT 30 FORMAT TSVWithNamesAndTypes + """ + value = clickhouse_execute(args, query).decode(errors="replace") + print("\nTop short messages:\n") + print(value) + print("\n") + + query = """ + SELECT max((freq, message_format_string)), level + FROM (SELECT count() / (SELECT count() FROM system.text_log + WHERE (now() - toIntervalMinute(120)) < event_time) AS freq, + min(level) AS level, message_format_string FROM system.text_log + WHERE (now() - toIntervalMinute(120)) < event_time + GROUP BY message_format_string ORDER BY freq DESC) + GROUP BY level + """ + value = clickhouse_execute(args, query).decode(errors="replace") + print("\nTop messages by level:\n") + print(value) + print("\n") + def main(args): global server_died @@ -1891,7 +1984,9 @@ def main(args): args, "system", "processes", "is_all_data_sent" ) - if args.s3_storage and (BuildFlags.THREAD in args.build_flags or BuildFlags.DEBUG in args.build_flags): + if args.s3_storage and ( + BuildFlags.THREAD in args.build_flags or BuildFlags.DEBUG in args.build_flags + ): args.no_random_settings = True if args.skip: @@ -1963,10 +2058,9 @@ def main(args): exit_code.value = 1 if args.hung_check: - # Some queries may execute in background for some time after test was finished. This is normal. for _ in range(1, 60): - processlist = get_processlist(args) + processlist = get_processlist_with_stacktraces(args) if not processlist: break sleep(1) @@ -1980,7 +2074,6 @@ def main(args): print(json.dumps(processlist, indent=4)) print(get_transactions_list(args)) - print_stacktraces() exit_code.value = 1 else: print(colored("\nNo queries hung.", args, "green", attrs=["bold"])) @@ -2004,7 +2097,10 @@ def main(args): print("All tests have finished.") if args.report_logs_stats: - reportLogStats(args) + try: + reportLogStats(args) + except Exception as e: + print(f"Failed to get stats about log messages: {e}") if args.report_coverage and not reportCoverage(args): exit_code.value = 1 diff --git a/tests/fuzz/all.dict b/tests/fuzz/all.dict index 7977cb9ed21..17ef7d2ab1e 100644 --- a/tests/fuzz/all.dict +++ b/tests/fuzz/all.dict @@ -72,6 +72,7 @@ "arrayMap" "arrayMax" "arrayMin" +"arrayPartialShuffle" "arrayPopBack" "arrayPopFront" "arrayProduct" @@ -84,6 +85,7 @@ "arrayReverseFill" "arrayReverseSort" "arrayReverseSplit" +"arrayShuffle" "arraySlice" "arraySort" "arraySplit" diff --git a/tests/fuzz/dictionaries/functions.dict b/tests/fuzz/dictionaries/functions.dict index a07841f733e..e77a2a779fd 100644 --- a/tests/fuzz/dictionaries/functions.dict +++ b/tests/fuzz/dictionaries/functions.dict @@ -872,6 +872,8 @@ "nullIn" "MONTH" "arrayReverse" +"arrayShuffle" +"arrayPartialShuffle" "now64" "DATE" "addressToLine" diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 3a65a517d9c..fda31979363 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -1854,6 +1854,8 @@ class ClickHouseCluster: exec_cmd = ["docker", "exec"] if "user" in kwargs: exec_cmd += ["-u", kwargs["user"]] + if "privileged" in kwargs: + exec_cmd += ["--privileged"] result = subprocess_check_call( exec_cmd + [container_id] + cmd, detach=detach, nothrow=nothrow ) @@ -3003,6 +3005,8 @@ services: - NET_ADMIN - IPC_LOCK - SYS_NICE + # for umount/mount on fly + - SYS_ADMIN depends_on: {depends_on} user: '{user}' env_file: diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 90e8acc702d..322c3a0d9c4 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -192,6 +192,101 @@ def test_incremental_backup(): assert instance.query("SELECT count(), sum(x) FROM test.table2") == "102\t5081\n" +def test_increment_backup_without_changes(): + backup_name = new_backup_name() + incremental_backup_name = new_backup_name() + create_and_fill_table(n=1) + + system_backup_qry = "SELECT status, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size, error FROM system.backups WHERE id='{id_backup}'" + + assert instance.query("SELECT count(), sum(x) FROM test.table") == TSV([["1", "0"]]) + + # prepare first backup without base_backup + (id_backup, status) = instance.query( + f"BACKUP TABLE test.table TO {backup_name}" + ).split("\t") + + ( + backup_status, + num_files, + num_processed_files, + processed_files_size, + uncompressed_size, + compressed_size, + error, + ) = ( + instance.query(system_backup_qry.format(id_backup=id_backup)) + .strip("\n") + .split("\t") + ) + + assert backup_status == "BACKUP_CREATED" + assert num_files == "11" + assert int(uncompressed_size) > 0 + assert int(compressed_size) > 0 + assert error == "" + + # create second backup without changes based on the first one + (id_backup_wo_changes, status_backup_wo_changes) = instance.query( + f"BACKUP TABLE test.table TO {incremental_backup_name} SETTINGS base_backup = {backup_name}" + ).split("\t") + + ( + backup_status_wo_changes, + num_files_backup_wo_changes, + num_processed_files_backup_wo_changes, + processed_files_size_backup_wo_changes, + uncompressed_size_backup_wo_changes, + compressed_size_backup_wo_changes, + error_snd, + ) = ( + instance.query(system_backup_qry.format(id_backup=id_backup_wo_changes)) + .strip("\n") + .split("\t") + ) + + assert backup_status_wo_changes == "BACKUP_CREATED" + assert num_files_backup_wo_changes == "1" + assert num_processed_files_backup_wo_changes == "11" + assert int(processed_files_size_backup_wo_changes) > 0 + assert int(uncompressed_size_backup_wo_changes) > 0 + assert int(compressed_size_backup_wo_changes) > 0 + assert error_snd == "" + + # restore the second backup + # we expect to see all files in the meta info of the restore and a sum of uncompressed and compressed sizes + (id_restore, status_restore) = instance.query( + f"RESTORE TABLE test.table AS test.table2 FROM {incremental_backup_name}" + ).split("\t") + + assert instance.query("SELECT count(), sum(x) FROM test.table2") == TSV( + [["1", "0"]] + ) + + ( + restore_status, + restore_num_files, + restore_num_processed_files, + restore_processed_files_size, + restore_uncompressed_size, + restore_compressed_size, + restore_error, + ) = ( + instance.query(system_backup_qry.format(id_backup=id_restore)) + .strip("\n") + .split("\t") + ) + + assert restore_status == "RESTORED" + assert int(restore_num_files) == 1 + assert int(restore_num_processed_files) == int( + num_processed_files_backup_wo_changes + ) + assert int(restore_uncompressed_size) > 0 + assert int(restore_compressed_size) > 0 + assert restore_error == "" + + def test_incremental_backup_overflow(): backup_name = new_backup_name() incremental_backup_name = new_backup_name() @@ -430,6 +525,24 @@ def test_zip_archive_with_settings(): assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" +def test_zip_archive_with_bad_compression_method(): + backup_name = f"Disk('backups', 'archive_with_bad_compression_method.zip')" + create_and_fill_table() + + assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" + + expected_error = "Unknown compression method specified for a zip archive" + assert expected_error in instance.query_and_get_error( + f"BACKUP TABLE test.table TO {backup_name} SETTINGS id='archive_with_bad_compression_method', compression_method='foobar'" + ) + assert ( + instance.query( + "SELECT status FROM system.backups WHERE id='archive_with_bad_compression_method'" + ) + == "BACKUP_FAILED\n" + ) + + def test_async(): create_and_fill_table() assert instance.query("SELECT count(), sum(x) FROM test.table") == "100\t4950\n" @@ -1089,9 +1202,18 @@ def test_system_backups(): id = instance.query(f"BACKUP TABLE test.table TO {backup_name}").split("\t")[0] - [name, status, num_files, uncompressed_size, compressed_size, error] = ( + [ + name, + status, + num_files, + num_processed_files, + processed_files_size, + uncompressed_size, + compressed_size, + error, + ] = ( instance.query( - f"SELECT name, status, num_files, uncompressed_size, compressed_size, error FROM system.backups WHERE id='{id}'" + f"SELECT name, status, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size, error FROM system.backups WHERE id='{id}'" ) .strip("\n") .split("\t") @@ -1101,9 +1223,13 @@ def test_system_backups(): num_files = int(num_files) compressed_size = int(compressed_size) uncompressed_size = int(uncompressed_size) + num_processed_files = int(num_processed_files) + processed_files_size = int(processed_files_size) assert name == escaped_backup_name assert status == "BACKUP_CREATED" assert num_files > 1 + assert num_processed_files > 1 + assert processed_files_size > 1 assert uncompressed_size > 1 assert compressed_size == uncompressed_size assert error == "" @@ -1115,9 +1241,17 @@ def test_system_backups(): ) escaped_backup_name = backup_name.replace("'", "\\'") - [status, num_files, uncompressed_size, compressed_size, error] = ( + [ + status, + num_files, + num_processed_files, + processed_files_size, + uncompressed_size, + compressed_size, + error, + ] = ( instance.query( - f"SELECT status, num_files, uncompressed_size, compressed_size, error FROM system.backups WHERE name='{escaped_backup_name}'" + f"SELECT status, num_files, num_processed_files, processed_files_size, uncompressed_size, compressed_size, error FROM system.backups WHERE name='{escaped_backup_name}'" ) .strip("\n") .split("\t") @@ -1126,10 +1260,14 @@ def test_system_backups(): num_files = int(num_files) compressed_size = int(compressed_size) uncompressed_size = int(uncompressed_size) + num_processed_files = int(num_processed_files) + processed_files_size = int(processed_files_size) assert status == "BACKUP_FAILED" assert num_files == 0 assert uncompressed_size == 0 assert compressed_size == 0 + assert num_processed_files == 0 + assert processed_files_size == 0 assert expected_error in error diff --git a/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml b/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml index 144be77c9f9..9e67f54f8e8 100644 --- a/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml +++ b/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml @@ -9,7 +9,7 @@ backups + false + false - false - false diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 8f514b95d0b..43e7682ec1d 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -148,10 +148,14 @@ def test_concurrent_backups_on_different_nodes(): backup_name = new_backup_name() - nodes[1].query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") + id = ( + nodes[1] + .query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") + .split("\t")[0] + ) assert_eq_with_retry( nodes[1], - f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP'", + f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'", "CREATING_BACKUP", ) assert "Concurrent backups not supported" in nodes[2].query_and_get_error( diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index b76ff2a2479..d5a7579df51 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -127,7 +127,7 @@ def test_backup_to_s3_multipart(): backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" check_backup_and_restore(storage_policy, backup_destination, size=1000000) assert node.contains_in_log( - f"copyDataToS3: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" + f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" ) @@ -140,7 +140,7 @@ def test_backup_to_s3_native_copy(): check_backup_and_restore(storage_policy, backup_destination) assert node.contains_in_log("using native copy") assert node.contains_in_log( - f"copyFileS3ToS3: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" + f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -153,7 +153,7 @@ def test_backup_to_s3_native_copy_other_bucket(): check_backup_and_restore(storage_policy, backup_destination) assert node.contains_in_log("using native copy") assert node.contains_in_log( - f"copyFileS3ToS3: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" + f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -164,5 +164,5 @@ def test_backup_to_s3_native_copy_multipart(): check_backup_and_restore(storage_policy, backup_destination, size=1000000) assert node.contains_in_log("using native copy") assert node.contains_in_log( - f"copyFileS3ToS3: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" + f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" ) diff --git a/tests/integration/test_drop_no_local_path/__init__.py b/tests/integration/test_drop_no_local_path/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_drop_no_local_path/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_drop_no_local_path/configs/remote_servers.xml b/tests/integration/test_drop_no_local_path/configs/remote_servers.xml new file mode 100644 index 00000000000..c5e0e7ee366 --- /dev/null +++ b/tests/integration/test_drop_no_local_path/configs/remote_servers.xml @@ -0,0 +1,12 @@ + + + + + + instance + 9000 + + + + + diff --git a/tests/integration/test_drop_no_local_path/test.py b/tests/integration/test_drop_no_local_path/test.py new file mode 100644 index 00000000000..6e587f0a050 --- /dev/null +++ b/tests/integration/test_drop_no_local_path/test.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 + +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance("instance", main_configs=["configs/remote_servers.xml"]) + + +@pytest.fixture(scope="module") +def setup_nodes(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def drop_table_directory(table_name): + data_path = instance.query( + f"SELECT data_paths[1] FROM system.tables where name = '{table_name}'" + ).strip() + print("Data path", data_path) + instance.exec_in_container( + ["bash", "-c", f"rm -fr {data_path}"], privileged=True, user="root" + ) + + +def test_drop_no_local_path(setup_nodes): + instance.query( + "CREATE TABLE merge_tree_table (key UInt64) ENGINE = MergeTree() ORDER BY tuple()" + ) + instance.query("INSERT INTO merge_tree_table VALUES (1)") + drop_table_directory("merge_tree_table") + instance.query("DROP TABLE merge_tree_table SYNC", timeout=10) + + instance.query( + "CREATE TABLE merge_tree_table (key UInt64) ENGINE = MergeTree() ORDER BY tuple()" + ) + + instance.query( + "CREATE TABLE distributed_table (key UInt64) ENGINE = Distributed(test_cluster, default, merge_tree_table, key)" + ) + instance.query("INSERT INTO distributed_table VALUES(0)") + drop_table_directory("distributed_table") + instance.query("DROP TABLE distributed_table SYNC", timeout=10) + + instance.query("DROP TABLE merge_tree_table SYNC", timeout=10) + + instance.query( + "CREATE TABLE join_table(`id` UInt64, `val` String) ENGINE = Join(ANY, LEFT, id)" + ) + instance.query("INSERT INTO join_table VALUES (1, 'a')") + + drop_table_directory("join_table") + + instance.query("TRUNCATE TABLE join_table", timeout=10) diff --git a/tests/integration/test_jbod_ha/test.py b/tests/integration/test_jbod_ha/test.py index d82fca32f55..5cbb5989ff3 100644 --- a/tests/integration/test_jbod_ha/test.py +++ b/tests/integration/test_jbod_ha/test.py @@ -72,9 +72,21 @@ def test_jbod_ha(start_cluster): node2.query("SYSTEM SYNC REPLICA tbl", timeout=10) - # mimic disk failure + # Mimic disk failure + # + # NOTE: you cannot do one of the following: + # - chmod 000 - this will not block access to the owner of the namespace, + # and running clickhouse from non-root user is very tricky in this + # sandbox. + # - unmount it, to replace with something else because in this case you + # will loose tmpfs and besides clickhouse works from root, so it will + # still be able to write/read from/to it. + # + # So it simply mounts over tmpfs, proc, and this will throw exception + # for read, because there is no such file and does not allows writes + # either. node1.exec_in_container( - ["bash", "-c", "chmod -R 000 /jbod1"], privileged=True, user="root" + ["bash", "-c", "mount -t proc proc /jbod1"], privileged=True, user="root" ) time.sleep(3) @@ -91,9 +103,11 @@ def test_jbod_ha(start_cluster): assert int(node1.query("select count(p) from tbl")) == 2500 - # mimic disk recovery + # Mimic disk recovery + # + # NOTE: this will unmount only proc from /jbod1 and leave tmpfs node1.exec_in_container( - ["bash", "-c", "chmod -R 755 /jbod1"], + ["bash", "-c", "umount /jbod1"], privileged=True, user="root", ) diff --git a/tests/integration/test_mask_sensitive_info/configs/named_collections.xml b/tests/integration/test_mask_sensitive_info/configs/named_collections.xml new file mode 100644 index 00000000000..ee923a90171 --- /dev/null +++ b/tests/integration/test_mask_sensitive_info/configs/named_collections.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/tests/integration/test_mask_sensitive_info/test.py b/tests/integration/test_mask_sensitive_info/test.py index f546c559f66..3f71b047213 100644 --- a/tests/integration/test_mask_sensitive_info/test.py +++ b/tests/integration/test_mask_sensitive_info/test.py @@ -4,7 +4,13 @@ from helpers.cluster import ClickHouseCluster from helpers.test_tools import TSV cluster = ClickHouseCluster(__file__) -node = cluster.add_instance("node", with_zookeeper=True) +node = cluster.add_instance( + "node", + main_configs=[ + "configs/named_collections.xml", + ], + with_zookeeper=True, +) @pytest.fixture(scope="module", autouse=True) @@ -21,11 +27,21 @@ def check_logs(must_contain=[], must_not_contain=[]): node.query("SYSTEM FLUSH LOGS") for str in must_contain: - escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]") + escaped_str = ( + str.replace("`", "\\`") + .replace("[", "\\[") + .replace("]", "\\]") + .replace("*", "\\*") + ) assert node.contains_in_log(escaped_str) for str in must_not_contain: - escaped_str = str.replace("`", "\\`").replace("[", "\\[").replace("]", "\\]") + escaped_str = ( + str.replace("`", "\\`") + .replace("[", "\\[") + .replace("]", "\\]") + .replace("*", "\\*") + ) assert not node.contains_in_log(escaped_str) for str in must_contain: @@ -106,6 +122,12 @@ def test_create_table(): f"S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", f"S3('http://minio1:9001/root/data/test4.csv', 'minio', '{password}', 'CSV')", f"S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '{password}', 'CSV', 'gzip')", + f"MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", + f"MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '{password}', table = 'mysql_table', user = 'mysql_user')", + f"MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')", + f"PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '{password}')", + f"MongoDB(named_collection_5, host = 'mongo1', port = 5432, database = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '{password}')", + f"S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '{password}', format = 'CSV')", ] for i, table_engine in enumerate(table_engines): @@ -137,6 +159,12 @@ def test_create_table(): "CREATE TABLE table5 (x int) ENGINE = S3('http://minio1:9001/root/data/test3.csv.gz', 'CSV', 'gzip')", "CREATE TABLE table6 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test4.csv', 'minio', '[HIDDEN]', 'CSV')", "CREATE TABLE table7 (`x` int) ENGINE = S3('http://minio1:9001/root/data/test5.csv.gz', 'minio', '[HIDDEN]', 'CSV', 'gzip')", + "CREATE TABLE table8 (`x` int) ENGINE = MySQL(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", + "CREATE TABLE table9 (`x` int) ENGINE = MySQL(named_collection_2, database = 'mysql_db', host = 'mysql57', port = 3306, password = '[HIDDEN]', table = 'mysql_table', user = 'mysql_user')", + "CREATE TABLE table10 (x int) ENGINE = MySQL(named_collection_3, database = 'mysql_db', host = 'mysql57', port = 3306, table = 'mysql_table')", + "CREATE TABLE table11 (`x` int) ENGINE = PostgreSQL(named_collection_4, host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user', password = '[HIDDEN]')", + "CREATE TABLE table12 (`x` int) ENGINE = MongoDB(named_collection_5, host = 'mongo1', port = 5432, database = 'mongo_db', collection = 'mongo_col', user = 'mongo_user', password = '[HIDDEN]'", + "CREATE TABLE table13 (`x` int) ENGINE = S3(named_collection_6, url = 'http://minio1:9001/root/data/test8.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]', format = 'CSV')", ], must_not_contain=[password], ) @@ -150,6 +178,7 @@ def test_create_database(): database_engines = [ f"MySQL('localhost:3306', 'mysql_db', 'mysql_user', '{password}') SETTINGS connect_timeout=1, connection_max_tries=1", + f"MySQL(named_collection_1, host = 'localhost', port = 3306, database = 'mysql_db', user = 'mysql_user', password = '{password}') SETTINGS connect_timeout=1, connection_max_tries=1", # f"PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '{password}')", ] @@ -163,7 +192,8 @@ def test_create_database(): check_logs( must_contain=[ "CREATE DATABASE database0 ENGINE = MySQL('localhost:3306', 'mysql_db', 'mysql_user', '[HIDDEN]')", - # "CREATE DATABASE database1 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')", + "CREATE DATABASE database1 ENGINE = MySQL(named_collection_1, host = 'localhost', port = 3306, database = 'mysql_db', user = 'mysql_user', password = '[HIDDEN]')", + # "CREATE DATABASE database2 ENGINE = PostgreSQL('localhost:5432', 'postgres_db', 'postgres_user', '[HIDDEN]')", ], must_not_contain=[password], ) @@ -201,6 +231,11 @@ def test_table_functions(): f"remote('127.{{2..11}}', numbers(10), 'remote_user', '{password}', rand())", f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', '{password}')", f"remoteSecure('127.{{2..11}}', 'default', 'remote_table', 'remote_user', rand())", + f"mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '{password}')", + f"postgresql(named_collection_2, password = '{password}', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')", + f"s3(named_collection_3, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '{password}')", + f"remote(named_collection_4, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}', sharding_key = rand())", + f"remoteSecure(named_collection_5, addresses_expr = '127.{{2..11}}', database = 'default', table = 'remote_table', user = 'remote_user', password = '{password}')", ] for i, table_function in enumerate(table_functions): @@ -249,6 +284,11 @@ def test_table_functions(): "CREATE TABLE tablefunc22 (`x` int) AS remote('127.{2..11}', numbers(10), 'remote_user', '[HIDDEN]', rand())", "CREATE TABLE tablefunc23 (`x` int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', '[HIDDEN]')", "CREATE TABLE tablefunc24 (x int) AS remoteSecure('127.{2..11}', 'default', 'remote_table', 'remote_user', rand())", + "CREATE TABLE tablefunc25 (`x` int) AS mysql(named_collection_1, host = 'mysql57', port = 3306, database = 'mysql_db', table = 'mysql_table', user = 'mysql_user', password = '[HIDDEN]')", + "CREATE TABLE tablefunc26 (`x` int) AS postgresql(named_collection_2, password = '[HIDDEN]', host = 'postgres1', port = 5432, database = 'postgres_db', table = 'postgres_table', user = 'postgres_user')", + "CREATE TABLE tablefunc27 (`x` int) AS s3(named_collection_3, url = 'http://minio1:9001/root/data/test4.csv', access_key_id = 'minio', secret_access_key = '[HIDDEN]')", + "CREATE TABLE tablefunc28 (`x` int) AS remote(named_collection_4, addresses_expr = '127.{2..11}', database = 'default', table = 'remote_table', user = 'remote_user', password = '[HIDDEN]', sharding_key = rand())", + "CREATE TABLE tablefunc29 (`x` int) AS remoteSecure(named_collection_5, addresses_expr = '127.{2..11}', database = 'default', table = 'remote_table', user = 'remote_user', password = '[HIDDEN]')", ], must_not_contain=[password], ) @@ -257,6 +297,34 @@ def test_table_functions(): node.query(f"DROP TABLE tablefunc{i}") +def test_table_function_ways_to_call(): + password = new_password() + + table_function = f"s3('http://minio1:9001/root/data/testfuncw.tsv.gz', 'minio', '{password}', 'TSV', 'x int')" + + queries = [ + "CREATE TABLE tablefuncw (`x` int) AS {}", + "INSERT INTO FUNCTION {} SELECT * FROM numbers(10)", + "DESCRIBE TABLE {}", + ] + + for query in queries: + # query_and_get_answer_with_error() is used here because we don't want to stop on error "Cannot connect to AWS". + # We test logging here and not actual work with AWS server. + node.query_and_get_answer_with_error(query.format(table_function)) + + table_function_with_hidden_arg = "s3('http://minio1:9001/root/data/testfuncw.tsv.gz', 'minio', '[HIDDEN]', 'TSV', 'x int')" + + check_logs( + must_contain=[ + query.format(table_function_with_hidden_arg) for query in queries + ], + must_not_contain=[password], + ) + + node.query("DROP TABLE tablefuncw") + + def test_encryption_functions(): plaintext = new_password() cipher = new_password() diff --git a/tests/integration/test_odbc_interaction/configs/dictionaries/postgres_odbc_no_connection_pool_dictionary.xml b/tests/integration/test_odbc_interaction/configs/dictionaries/postgres_odbc_no_connection_pool_dictionary.xml new file mode 100644 index 00000000000..a8321b1bbf1 --- /dev/null +++ b/tests/integration/test_odbc_interaction/configs/dictionaries/postgres_odbc_no_connection_pool_dictionary.xml @@ -0,0 +1,41 @@ + + + postgres_odbc_nopool + + +

clickhouse.test_table
+ DSN=postgresql_odbc + postgres + + + 0 + + + + 5 + 5 + + + + + + + + id + + + + column1 + Int64 + 1 + + + + column2 + String + '' + + + + + diff --git a/tests/integration/test_odbc_interaction/test.py b/tests/integration/test_odbc_interaction/test.py index ed925759114..14f5de17870 100644 --- a/tests/integration/test_odbc_interaction/test.py +++ b/tests/integration/test_odbc_interaction/test.py @@ -21,6 +21,7 @@ node1 = cluster.add_instance( "configs/dictionaries/sqlite3_odbc_hashed_dictionary.xml", "configs/dictionaries/sqlite3_odbc_cached_dictionary.xml", "configs/dictionaries/postgres_odbc_hashed_dictionary.xml", + "configs/dictionaries/postgres_odbc_no_connection_pool_dictionary.xml", ], ) @@ -624,6 +625,34 @@ def test_postgres_odbc_hashed_dictionary_no_tty_pipe_overflow(started_cluster): cursor.execute("truncate table clickhouse.test_table") +def test_no_connection_pooling(started_cluster): + skip_test_msan(node1) + + conn = get_postgres_conn(started_cluster) + cursor = conn.cursor() + cursor.execute( + "insert into clickhouse.test_table values(1, 1, 'hello'),(2, 2, 'world')" + ) + node1.exec_in_container(["ss", "-K", "dport", "5432"], privileged=True, user="root") + node1.query("SYSTEM RELOAD DICTIONARY postgres_odbc_nopool") + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(1))", + "hello", + ) + assert_eq_with_retry( + node1, + "select dictGetString('postgres_odbc_nopool', 'column2', toUInt64(2))", + "world", + ) + + # No open connections should be left because we don't use connection pooling. + assert "" == node1.exec_in_container( + ["ss", "-H", "dport", "5432"], privileged=True, user="root" + ) + cursor.execute("truncate table clickhouse.test_table") + + def test_postgres_insert(started_cluster): skip_test_msan(node1) diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py index d3fcc89561a..8160a6b47a7 100644 --- a/tests/integration/test_replicated_database/test.py +++ b/tests/integration/test_replicated_database/test.py @@ -212,6 +212,48 @@ def test_simple_alter_table(started_cluster, engine): competing_node.query("DROP DATABASE testdb SYNC") +@pytest.mark.parametrize("engine", ["MergeTree", "ReplicatedMergeTree"]) +def test_delete_from_table(started_cluster, engine): + main_node.query( + "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard1', 'replica1');" + ) + dummy_node.query( + "CREATE DATABASE testdb ENGINE = Replicated('/clickhouse/databases/test1', 'shard2', 'replica1');" + ) + + name = "testdb.delete_test_{}".format(engine) + main_node.query( + "CREATE TABLE {} " + "(id UInt64, value String) " + "ENGINE = {} PARTITION BY id%2 ORDER BY (id);".format(name, engine) + ) + main_node.query("INSERT INTO TABLE {} VALUES(1, 'aaaa');".format(name)) + main_node.query("INSERT INTO TABLE {} VALUES(2, 'aaaa');".format(name)) + dummy_node.query("INSERT INTO TABLE {} VALUES(1, 'bbbb');".format(name)) + dummy_node.query("INSERT INTO TABLE {} VALUES(2, 'bbbb');".format(name)) + + main_node.query( + "SET allow_experimental_lightweight_delete=1; DELETE FROM {} WHERE id=2;".format( + name + ) + ) + + expected = "1\taaaa\n1\tbbbb" + + table_for_select = name + if not "Replicated" in engine: + table_for_select = "cluster('testdb', {})".format(name) + for node in [main_node, dummy_node]: + assert_eq_with_retry( + node, + "SELECT * FROM {} ORDER BY id, value;".format(table_for_select), + expected, + ) + + main_node.query("DROP DATABASE testdb SYNC") + dummy_node.query("DROP DATABASE testdb SYNC") + + def get_table_uuid(database, name): return main_node.query( f"SELECT uuid FROM system.tables WHERE database = '{database}' and name = '{name}'" diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/test.py b/tests/integration/test_replicated_merge_tree_s3_restore/test.py index f26b3e7bd35..2181f260f32 100644 --- a/tests/integration/test_replicated_merge_tree_s3_restore/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_restore/test.py @@ -131,24 +131,29 @@ def create_restore_file(node, revision=None, bucket=None, path=None, detached=No ["bash", "-c", "touch /var/lib/clickhouse/disks/s3/restore"], user="root" ) - add_restore_option = 'echo -en "{}={}\n" >> /var/lib/clickhouse/disks/s3/restore' - if revision: + num_restore_options = 0 + + def add_restore_option(key, value): + nonlocal num_restore_options + to = ">>" if num_restore_options else ">" node.exec_in_container( - ["bash", "-c", add_restore_option.format("revision", revision)], user="root" - ) - if bucket: - node.exec_in_container( - ["bash", "-c", add_restore_option.format("source_bucket", bucket)], + [ + "bash", + "-c", + f'echo -en "{key}={value}\n" {to} /var/lib/clickhouse/disks/s3/restore', + ], user="root", ) + num_restore_options += 1 + + if revision: + add_restore_option("revision", revision) + if bucket: + add_restore_option("source_bucket", bucket) if path: - node.exec_in_container( - ["bash", "-c", add_restore_option.format("source_path", path)], user="root" - ) + add_restore_option("source_path", path) if detached: - node.exec_in_container( - ["bash", "-c", add_restore_option.format("detached", "true")], user="root" - ) + add_restore_option("detached", "true") def get_revision_counter(node, backup_number): diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 21d41ec2a38..ac6eee11892 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1743,7 +1743,7 @@ def test_s3_list_objects_failure(started_cluster): get_query = """ SELECT sleep({seconds}) FROM s3('http://resolver:8083/{bucket}/test_no_list_*', 'CSV', 'c1 UInt32') - SETTINGS s3_list_object_keys_size = 1, max_threads = {max_threads}, enable_s3_requests_logging = 1, input_format_parallel_parsing = 0 + SETTINGS s3_list_object_keys_size = 1, max_threads = {max_threads}, enable_s3_requests_logging = 1 """.format( bucket=bucket, seconds=random.random(), max_threads=random.randint(2, 20) ) diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py index 643d5ffc8c7..0a469bd7bbd 100644 --- a/tests/integration/test_system_merges/test.py +++ b/tests/integration/test_system_merges/test.py @@ -96,7 +96,7 @@ def test_merge_simple(started_cluster, replicated): # Wait for OPTIMIZE to actually start assert_eq_with_retry( - node1, + node_check, f"select count() from system.merges where table='{table_name}'", "1\n", retry_count=30, @@ -196,7 +196,7 @@ def test_mutation_simple(started_cluster, replicated): # Wait for the mutation to actually start assert_eq_with_retry( - node1, + node_check, f"select count() from system.merges where table='{table_name}'", "1\n", retry_count=30, diff --git a/tests/performance/codecs_int_insert.xml b/tests/performance/codecs_int_insert.xml index caefaba3725..618e20160f8 100644 --- a/tests/performance/codecs_int_insert.xml +++ b/tests/performance/codecs_int_insert.xml @@ -13,7 +13,6 @@ Delta T64 DoubleDelta - Gorilla diff --git a/tests/performance/codecs_int_select.xml b/tests/performance/codecs_int_select.xml index 7d47cd300d8..62c1ee16e7b 100644 --- a/tests/performance/codecs_int_select.xml +++ b/tests/performance/codecs_int_select.xml @@ -13,7 +13,6 @@ Delta T64 DoubleDelta - Gorilla diff --git a/tests/performance/date_time_long.xml b/tests/performance/date_time_long.xml index 240481969a8..0543698ae61 100644 --- a/tests/performance/date_time_long.xml +++ b/tests/performance/date_time_long.xml @@ -6,7 +6,6 @@ toSecond toMinute toHour - toDayOfWeek toDayOfMonth toDayOfYear toMonth @@ -47,21 +46,33 @@ toUnixTimestamp + + datetime_transform_with_mode + + toDayOfWeek + toStartOfWeek + toWeek + toYearWeek + + date_transform - toDayOfWeek toDayOfMonth + toDayOfWeek toDayOfYear + toWeek toMonth toQuarter toYear + toYearWeek toISOWeek toISOYear toDate toMonday toStartOfDay + toStartOfWeek toStartOfMonth toStartOfQuarter toStartOfYear @@ -79,14 +90,6 @@ toRelativeQuarterNum - - time_zone - - UTC - Asia/Istanbul - Asia/Kolkata - - binary_function @@ -116,11 +119,21 @@ subtractYears + + time_zone + + UTC + Asia/Istanbul + Asia/Kolkata + + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform}(t, '{time_zone}')) + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {datetime_transform_with_mode}(t, 0, '{time_zone}')) SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, date_trunc('month', t)) diff --git a/tests/performance/date_time_short.xml b/tests/performance/date_time_short.xml index de859710670..2bd0598b41a 100644 --- a/tests/performance/date_time_short.xml +++ b/tests/performance/date_time_short.xml @@ -1,15 +1,12 @@ + - date_transform - toDayOfWeek toMonday toRelativeDayNum toYYYYMMDDhhmmss @@ -32,16 +29,24 @@ - + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {date_transform}(t, '{time_zone}')) SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, {date_transform}(t)) SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toUnixTimestamp(t, '{time_zone}')) - + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, toUnixTimestamp(toUInt16(t))) + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, {binary_function}(t, 1)) + + + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toStartOfInterval(t, INTERVAL 1 month)) + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, date_trunc('month', t)) + + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDateTime('2017-01-01 00:00:00') + number % 100000000 + rand() % 100000 AS t, toDayOfWeek(t, 0, '{time_zone}')) + SELECT count() FROM numbers(50000000) WHERE NOT ignore(toDate('2017-01-01') + number % 1000 + rand() % 10 AS t, toDayOfWeek(t)) diff --git a/tests/performance/position_empty_needle.xml b/tests/performance/position_empty_needle.xml new file mode 100644 index 00000000000..43f0f09b8d5 --- /dev/null +++ b/tests/performance/position_empty_needle.xml @@ -0,0 +1,13 @@ + + select position(materialize('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), '') from numbers(100000000) format Null + select position(materialize('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), '', 10) from numbers(100000000) format Null + + select positionCaseInsensitive(materialize('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), '') from numbers(100000000) format Null + select positionCaseInsensitive(materialize('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), '', 10) from numbers(100000000) format Null + + select positionUTF8(materialize('xẞyyaa1ẞ1yzẞXẞẞ1ẞẞ1bctest'), '') from numbers(100000000) format Null + select positionUTF8(materialize('xẞyyaa1ẞ1yzẞXẞẞ1ẞẞ1bctest'), '', 10) from numbers(100000000) format Null + + select positionCaseInsensitiveUTF8(materialize('xẞyyaa1ẞ1yzẞXẞẞ1ẞẞ1bctest'), '') from numbers(100000000) format Null + select positionCaseInsensitiveUTF8(materialize('xẞyyaa1ẞ1yzẞXẞẞ1ẞẞ1bctest'), '', 10) from numbers(100000000) format Null + diff --git a/tests/performance/sort.xml b/tests/performance/sort.xml index 39de5fc04a0..4f126047dfb 100644 --- a/tests/performance/sort.xml +++ b/tests/performance/sort.xml @@ -1,4 +1,8 @@ + + 0 + + CREATE TABLE rand_unlimited_10m_8 (key UInt8) Engine = Memory CREATE TABLE rand_1k_10m_16 (key UInt16) Engine = Memory diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference new file mode 100644 index 00000000000..d3b69939e66 --- /dev/null +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.reference @@ -0,0 +1,14 @@ +10 0.001 +20 0.05 +30 10 +40 40 +50 125 +60 0.3 +70 0.16 +80 0.08 +90 0.04 +100 0.005 +110 0.01 +120 3 +130 10 +140 15 diff --git a/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql new file mode 100644 index 00000000000..e6c9ea50924 --- /dev/null +++ b/tests/queries/0_stateless/00002_log_and_exception_messages_formatting.sql @@ -0,0 +1,61 @@ +-- Tags: no-parallel +-- no-parallel because we want to run this test when most of the other tests already passed + +-- If this test fails, see the "Top patterns of log messages" diagnostics in the end of run.log + +system flush logs; +drop table if exists logs; +create view logs as select * from system.text_log where now() - toIntervalMinute(120) < event_time; + +-- Check that we don't have too many messages formatted with fmt::runtime or strings concatenation. +-- 0.001 threshold should be always enough, the value was about 0.00025 +select 10, max2(sum(length(message_format_string) = 0) / count(), 0.001) from logs; + +-- Check the same for exceptions. The value was 0.03 +select 20, max2(sum(length(message_format_string) = 0) / count(), 0.05) from logs where message like '%DB::Exception%'; + +-- Check that we don't have too many short meaningless message patterns. +select 30, max2(countDistinct(message_format_string), 10) from logs where length(message_format_string) < 10; + +-- Same as above. Feel free to update the threshold or remove this query if really necessary +select 40, max2(countDistinct(message_format_string), 40) from logs where length(message_format_string) < 16; + +-- Same as above, but exceptions must be more informative. Feel free to update the threshold or remove this query if really necessary +select 50, max2(countDistinct(message_format_string), 125) from logs where length(message_format_string) < 30 and message ilike '%DB::Exception%'; + + +-- Avoid too noisy messages: top 1 message frequency must be less than 30%. We should reduce the threshold +select 60, max2((select count() from logs group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.30); + +-- Same as above, but excluding Test level (actually finds top 1 Trace message) +with ('Access granted: {}{}', '{} -> {}') as frequent_in_tests +select 70, max2((select count() from logs where level!='Test' and message_format_string not in frequent_in_tests + group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.16); + +-- Same as above for Debug +select 80, max2((select count() from logs where level <= 'Debug' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.08); + +-- Same as above for Info +select 90, max2((select count() from logs where level <= 'Information' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.04); + +-- Same as above for Warning +with ('Not enabled four letter command {}') as frequent_in_tests +select 100, max2((select count() from logs where level = 'Warning' and message_format_string not in frequent_in_tests + group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.005); + +-- Same as above for Error +select 110, max2((select count() from logs where level = 'Warning' group by message_format_string order by count() desc limit 1) / (select count() from logs), 0.01); + +-- Avoid too noisy messages: limit the number of messages with high frequency +select 120, max2(count(), 3) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.10); +select 130, max2(count(), 10) from (select count() / (select count() from logs) as freq, message_format_string from logs group by message_format_string having freq > 0.05); + +-- Each message matches its pattern (returns 0 rows) +-- FIXME maybe we should make it stricter ('Code:%Exception: '||s||'%'), but it's not easy because of addMessage +select 140, max2(countDistinct(message_format_string), 15) from ( + select message_format_string, any(message) as any_message from logs + where message not like (replaceRegexpAll(message_format_string, '{[:.0-9dfx]*}', '%') as s) + and message not like ('%Exception: '||s||'%') group by message_format_string +) where any_message not like '%Poco::Exception%'; + +drop table logs; diff --git a/tests/queries/0_stateless/00039_inserts_through_http.sh b/tests/queries/0_stateless/00039_inserts_through_http.sh index 2eaa4393935..7b2ec25923c 100755 --- a/tests/queries/0_stateless/00039_inserts_through_http.sh +++ b/tests/queries/0_stateless/00039_inserts_through_http.sh @@ -5,11 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo 'DROP TABLE IF EXISTS long_insert' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- -echo 'CREATE TABLE long_insert (a String) ENGINE = Memory' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo 'CREATE TABLE long_insert (str String) ENGINE = Memory' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- for string_size in 1 10 100 1000 10000 100000 1000000; do # LC_ALL=C is needed because otherwise Perl will bark on bad tuned environment. LC_ALL=C perl -we 'for my $letter ("a" .. "z") { print(($letter x '$string_size') . "\n") }' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT+INTO+long_insert+FORMAT+TabSeparated" --data-binary @- - echo 'SELECT substring(a, 1, 1) AS c, length(a) AS l FROM long_insert ORDER BY c, l' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- + echo 'SELECT substring(str, 1, 1) AS c, length(str) AS l FROM long_insert ORDER BY c, l' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- done echo 'DROP TABLE long_insert' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- diff --git a/tests/queries/0_stateless/00189_time_zones_long.sql b/tests/queries/0_stateless/00189_time_zones_long.sql index cf1b9e9ae1d..5760f6c0447 100644 --- a/tests/queries/0_stateless/00189_time_zones_long.sql +++ b/tests/queries/0_stateless/00189_time_zones_long.sql @@ -120,11 +120,11 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfWeek */ SELECT 'toDayOfWeek'; -SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Istanbul'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/Paris'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/London'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Tokyo'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Pacific/Pitcairn'); /* toHour */ diff --git a/tests/queries/0_stateless/00304_http_external_data.sh b/tests/queries/0_stateless/00304_http_external_data.sh index 4a097249cca..def17bc5cd1 100755 --- a/tests/queries/0_stateless/00304_http_external_data.sh +++ b/tests/queries/0_stateless/00304_http_external_data.sh @@ -6,4 +6,10 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) echo -ne '1,Hello\n2,World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String"; echo -ne '1@Hello\n2@World\n' | ${CLICKHOUSE_CURL} -sSF 'file=@-' "${CLICKHOUSE_URL}&query=SELECT+*+FROM+file&file_format=CSV&file_types=UInt8,String&format_csv_delimiter=@"; -echo -ne '\x01\x00\x00\x00\x02\x00\x00\x00' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary"; + +# use big-endian version of binary data for s390x +if [[ $(uname -a | grep s390x) ]]; then + echo -ne '\x00\x00\x00\x01\x00\x00\x00\x02' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary"; +else + echo -ne '\x01\x00\x00\x00\x02\x00\x00\x00' | ${CLICKHOUSE_CURL} -sSF "tmp=@-" "${CLICKHOUSE_URL}&query=SELECT+*+FROM+tmp&tmp_structure=TaskID+UInt32&tmp_format=RowBinary"; +fi diff --git a/tests/queries/0_stateless/00419_show_sql_queries.sh b/tests/queries/0_stateless/00419_show_sql_queries.sh index 1737e874ff2..607703b385a 100755 --- a/tests/queries/0_stateless/00419_show_sql_queries.sh +++ b/tests/queries/0_stateless/00419_show_sql_queries.sh @@ -7,3 +7,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) $CLICKHOUSE_CLIENT -q "SHOW PROCESSLIST" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW DATABASES" &>/dev/null $CLICKHOUSE_CLIENT -q "SHOW TABLES" &>/dev/null +$CLICKHOUSE_CLIENT -q "SHOW ENGINES" &>/dev/null diff --git a/tests/queries/0_stateless/00539_functions_for_working_with_json.sql b/tests/queries/0_stateless/00539_functions_for_working_with_json.sql index 38b217308d4..59d00058a13 100644 --- a/tests/queries/0_stateless/00539_functions_for_working_with_json.sql +++ b/tests/queries/0_stateless/00539_functions_for_working_with_json.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest -- VisitParam with basic type SELECT visitParamExtractInt('{"myparam":-1}', 'myparam'); diff --git a/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh b/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh index 0eeabde917c..530e7e92e08 100755 --- a/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh +++ b/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh @@ -6,4 +6,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -echo -ne '0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\ta' | $CLICKHOUSE_LOCAL --structure 'c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8, c10 UInt8, c11 UInt8' --input-format TSV --query 'SELECT * FROM table' 2>&1 | grep -F 'Column 11' +echo -ne '0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\ta' | $CLICKHOUSE_LOCAL --structure 'c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8, c10 UInt8, c11 UInt8' --input-format TSV --query 'SELECT * FROM table' --input_format_tsv_detect_header=0 2>&1 | grep -F 'Column 11' + diff --git a/tests/queries/0_stateless/00918_json_functions.sql b/tests/queries/0_stateless/00918_json_functions.sql index ab4bc639084..dfae54dfd16 100644 --- a/tests/queries/0_stateless/00918_json_functions.sql +++ b/tests/queries/0_stateless/00918_json_functions.sql @@ -1,4 +1,5 @@ -- Tags: no-fasttest +-- Tag: no-fasttest due to only SIMD JSON is available in fasttest SELECT '--allow_simdjson=1--'; SET allow_simdjson=1; diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python index e3cd7ee6d36..2706c0f5b12 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python @@ -7,14 +7,14 @@ import sys import argparse # Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime. -FUNCTIONS=""" +FUNCTIONS = """ toTimeZone(N, 'UTC') toYear(N, 'Asia/Istanbul') toQuarter(N, 'Asia/Istanbul') toMonth(N, 'Asia/Istanbul') toDayOfYear(N, 'Asia/Istanbul') toDayOfMonth(N, 'Asia/Istanbul') -toDayOfWeek(N, 'Asia/Istanbul') +toDayOfWeek(N, 0, 'Asia/Istanbul') toHour(N, 'Asia/Istanbul') toMinute(N, 'Asia/Istanbul') toSecond(N, 'Asia/Istanbul') @@ -90,68 +90,51 @@ formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', extra_ops = [ # With same type: ( - ['N {op} N'], + ["N {op} N"], { - 'op': - [ - '- ', # does not work, but should it? - '+ ', # does not work, but should it? - '!=', '==', # equality and inequality supposed to take sub-second part in account - '< ', - '<=', - '> ', - '>=' + "op": [ + "- ", # does not work, but should it? + "+ ", # does not work, but should it? + "!=", + "==", # equality and inequality supposed to take sub-second part in account + "< ", + "<=", + "> ", + ">=", ] - } + }, ), # With other DateTime types: ( - [ - 'N {op} {arg}', - '{arg} {op} N' - ], + ["N {op} {arg}", "{arg} {op} N"], { - 'op': - [ - '-', # does not work, but should it? - '!=', '==', + "op": [ + "-", # does not work, but should it? + "!=", + "==", # these are naturally expected to work, but they don't: - '< ', - '<=', - '> ', - '>=' + "< ", + "<=", + "> ", + ">=", ], - 'arg': ['DT', 'D', 'DT64'], - } + "arg": ["DT", "D", "DT64"], + }, ), # With arithmetic types ( - [ - 'N {op} {arg}', - '{arg} {op} N' - ], + ["N {op} {arg}", "{arg} {op} N"], { - 'op': - [ - '+ ', - '- ', - '==', - '!=', - '< ', - '<=', - '> ', - '>=' - ], - 'arg': - [ - 'toUInt8(1)', - 'toInt8(-1)', - 'toUInt16(1)', - 'toInt16(-1)', - 'toUInt32(1)', - 'toInt32(-1)', - 'toUInt64(1)', - 'toInt64(-1)' + "op": ["+ ", "- ", "==", "!=", "< ", "<=", "> ", ">="], + "arg": [ + "toUInt8(1)", + "toInt8(-1)", + "toUInt16(1)", + "toInt16(-1)", + "toUInt32(1)", + "toInt32(-1)", + "toUInt64(1)", + "toInt64(-1)", ], }, ), @@ -167,14 +150,17 @@ for funcs, args in extra_ops: # filter out empty lines and commented out lines COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#") -FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None]) -TYPES = ['D', 'DT', 'DT64'] +FUNCTIONS = list( + [f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None] +) +TYPES = ["D", "DT", "DT64"] + def escape_string(s): if sys.version_info[0] > 2: - return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'") + return s.encode("unicode_escape").decode("utf-8").replace("'", "\\'") else: - return s.encode('string-escape').decode('utf-8') + return s.encode("string-escape").decode("utf-8") def execute_functions_for_types(functions, types): @@ -186,18 +172,39 @@ def execute_functions_for_types(functions, types): WITH \ toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \ toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \ -toDate('2019-09-16') as D, {X} as N".format(X=dt) - print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func))) +toDate('2019-09-16') as D, {X} as N".format( + X=dt + ) + print( + ( + """{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format( + prologue=prologue, func=func + ) + ) + ) print("""SELECT '------------------------------------------';""") + def main(): def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None) - parser.add_argument('--types_re', - type=lambda s: re.compile('^(' + s + ')$'), - help="RE to enable types, supported types: " + ",".join(TYPES), default=None) - parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit") + parser.add_argument( + "--functions_re", + type=re.compile, + help="RE to enable functions", + default=None, + ) + parser.add_argument( + "--types_re", + type=lambda s: re.compile("^(" + s + ")$"), + help="RE to enable types, supported types: " + ",".join(TYPES), + default=None, + ) + parser.add_argument( + "--list_functions", + action="store_true", + help="List all functions to be tested and exit", + ) return parser.parse_args() args = parse_args() @@ -223,5 +230,6 @@ def main(): execute_functions_for_types(functions, types) -if __name__ == '__main__': + +if __name__ == "__main__": exit(main()) diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference index 8d28a69ff3d..8a168ed0e9e 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference @@ -28,7 +28,7 @@ SELECT toDayOfMonth(N, \'Asia/Istanbul\') "UInt8",16 "UInt8",16 ------------------------------------------ -SELECT toDayOfWeek(N, \'Asia/Istanbul\') +SELECT toDayOfWeek(N, 0, \'Asia/Istanbul\') "UInt8",1 "UInt8",1 "UInt8",1 diff --git a/tests/queries/0_stateless/00975_json_hang.sql b/tests/queries/0_stateless/00975_json_hang.sql index 71d921f0475..4c2a1a8ee79 100644 --- a/tests/queries/0_stateless/00975_json_hang.sql +++ b/tests/queries/0_stateless/00975_json_hang.sql @@ -1,3 +1,2 @@ --- Tags: no-fasttest SELECT DISTINCT JSONExtractRaw(concat('{"x":', rand() % 2 ? 'true' : 'false', '}'), 'x') AS res FROM numbers(1000000) ORDER BY res; diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference index 10e8f0d2c59..4b3beccf5f1 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference @@ -1,3 +1,8 @@ +0 +0 2007 2007 2007 +0 +2007 +2007 diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql index ad50420b6ae..d3b36cda0d8 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql @@ -1,4 +1,3 @@ - DROP TABLE IF EXISTS bloom_filter; CREATE TABLE bloom_filter @@ -13,9 +12,19 @@ insert into bloom_filter select number+2000, 'abc,def,zzz' from numbers(8); insert into bloom_filter select number+3000, 'yyy,uuu' from numbers(1024); insert into bloom_filter select number+3000, 'abcdefzzz' from numbers(1024); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS } +SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS } + +SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz'); +SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz'); + +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'ABC'); +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'zZz'); + set max_rows_to_read = 16; SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc'); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'ABC'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'def'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz'); diff --git a/tests/queries/0_stateless/01031_mutations_interpreter_and_context.sh b/tests/queries/0_stateless/01031_mutations_interpreter_and_context.sh index 8c172049745..9bc9028ad1f 100755 --- a/tests/queries/0_stateless/01031_mutations_interpreter_and_context.sh +++ b/tests/queries/0_stateless/01031_mutations_interpreter_and_context.sh @@ -1,5 +1,4 @@ #!/usr/bin/env bash -# Tags: no-parallel, no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh index bbc16121cb6..7f53bf2a627 100755 --- a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh @@ -1,14 +1,15 @@ #!/usr/bin/env bash # Tags: replica, no-parallel, no-fasttest - # This test checks mutations concurrent execution with concurrent inserts. # There was a bug in mutations finalization, when mutation finishes not after all # MUTATE_PART tasks execution, but after GET of already mutated part from other replica. # To test it we stop some replicas to delay fetch of required parts for mutation. -# Since our replication queue executing tasks concurrently it may happen, that we dowload already mutated +# Since our replication queue executing tasks concurrently it may happen, that we download already mutated # part before source part. +# Messages about deleting of tmp-fetch directories are ok. +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=fatal CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -21,7 +22,16 @@ for i in $(seq $REPLICAS); do done for i in $(seq $REPLICAS); do - $CLICKHOUSE_CLIENT --query "CREATE TABLE concurrent_mutate_mt_$i (key UInt64, value1 UInt64, value2 String) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_mutate_mt', '$i') ORDER BY key SETTINGS max_replicated_mutations_in_queue=1000, number_of_free_entries_in_pool_to_execute_mutation=0,max_replicated_merges_in_queue=1000,temporary_directories_lifetime=10,cleanup_delay_period=3,cleanup_delay_period_random_add=0" + $CLICKHOUSE_CLIENT --query " + CREATE TABLE concurrent_mutate_mt_$i (key UInt64, value1 UInt64, value2 String) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/concurrent_mutate_mt', '$i') + ORDER BY key + SETTINGS max_replicated_mutations_in_queue = 1000, + number_of_free_entries_in_pool_to_execute_mutation = 0, + max_replicated_merges_in_queue = 1000, + temporary_directories_lifetime = 10, + cleanup_delay_period = 3, + cleanup_delay_period_random_add = 0" done $CLICKHOUSE_CLIENT --query "INSERT INTO concurrent_mutate_mt_1 SELECT number, number + 10, toString(number) from numbers(10)" diff --git a/tests/queries/0_stateless/01085_simdjson_uint64.sql b/tests/queries/0_stateless/01085_simdjson_uint64.sql index 952bc150dec..5fc3bc7dbed 100644 --- a/tests/queries/0_stateless/01085_simdjson_uint64.sql +++ b/tests/queries/0_stateless/01085_simdjson_uint64.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest WITH '{"a": "hello", "b": 12345678901234567890}' AS json SELECT JSONExtractRaw(json, 'a'); diff --git a/tests/queries/0_stateless/01131_max_rows_to_sort.sql b/tests/queries/0_stateless/01131_max_rows_to_sort.sql index a6109700045..d18f35e091e 100644 --- a/tests/queries/0_stateless/01131_max_rows_to_sort.sql +++ b/tests/queries/0_stateless/01131_max_rows_to_sort.sql @@ -4,4 +4,5 @@ SELECT * FROM system.numbers ORDER BY number; -- { serverError 396 } SET sort_overflow_mode = 'break'; SET max_block_size = 1000; +set query_plan_remove_redundant_sorting=0; -- to keep sorting in the query below SELECT count() >= 100 AND count() <= 1000 FROM (SELECT * FROM system.numbers ORDER BY number); diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh index a6b3ebf4087..e75780a4520 100755 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CSV) +PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CSV --input_format_csv_detect_header 0) echo '2020-04-21 12:34:56, "Hello", 12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "CSV" echo '2020-04-21 12:34:56, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo '2020-04-21 12:34:567, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" @@ -14,7 +14,7 @@ echo '2020-04-21 12:34:56, "Hello", 12345678,1' | "${PARSER[@]}" 2>&1| grep "ERR echo '2020-04-21 12:34:56,,123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56, "Hello", 12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" -PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparatedIgnoreSpaces --format_custom_escaping_rule CSV --format_custom_field_delimiter ',' --format_custom_row_after_delimiter "") +PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparatedIgnoreSpaces --format_custom_escaping_rule CSV --format_custom_field_delimiter ',' --format_custom_row_after_delimiter "" --input_format_custom_detect_header 0) echo '2020-04-21 12:34:56, "Hello", 12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo -e "\nCustomSeparatedIgnoreSpaces" echo '2020-04-21 12:34:56, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo '2020-04-21 12:34:567, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" @@ -22,7 +22,7 @@ echo '2020-04-21 12:34:56, "Hello", 12345678,1' | "${PARSER[@]}" 2>&1| grep "ERR echo '2020-04-21 12:34:56,,123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56, "Hello", 12345678\n\n\n\n ' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "OK" -PARSER=(${CLICKHOUSE_LOCAL} --input_format_null_as_default 0 --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format TSV) +PARSER=(${CLICKHOUSE_LOCAL} --input_format_null_as_default 0 --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format TSV --input_format_tsv_detect_header 0) echo -e '2020-04-21 12:34:56\tHello\t12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo -e "\nTSV" echo -e '2020-04-21 12:34:56\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" @@ -31,7 +31,7 @@ echo -e '2020-04-21 12:34:56\t\t123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\tHello\t12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '\N\tHello\t12345678' | "${PARSER[@]}" 2>&1| grep -o "Unexpected NULL value" -PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparated) +PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparated --input_format_custom_detect_header 0) echo -e '2020-04-21 12:34:56\tHello\t12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo -e "\nCustomSeparated" echo -e '2020-04-21 12:34:56\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index f088cfaf00c..79399589533 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -96,6 +96,7 @@ SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYS SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP UNCOMPRESSED CACHE ['SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP QUERY CACHE ['SYSTEM DROP QUERY','DROP QUERY CACHE','DROP QUERY'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP COMPILED EXPRESSION CACHE ['SYSTEM DROP COMPILED EXPRESSION','DROP COMPILED EXPRESSION CACHE','DROP COMPILED EXPRESSIONS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FILESYSTEM CACHE ['SYSTEM DROP FILESYSTEM CACHE','DROP FILESYSTEM CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP SCHEMA CACHE ['SYSTEM DROP SCHEMA CACHE','DROP SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE @@ -124,6 +125,7 @@ SYSTEM RESTORE REPLICA ['RESTORE REPLICA'] TABLE SYSTEM SYSTEM WAIT LOADING PARTS ['WAIT LOADING PARTS'] TABLE SYSTEM SYSTEM SYNC DATABASE REPLICA ['SYNC DATABASE REPLICA'] DATABASE SYSTEM SYSTEM SYNC TRANSACTION LOG ['SYNC TRANSACTION LOG'] GLOBAL SYSTEM +SYSTEM SYNC FILE CACHE ['SYNC FILE CACHE'] GLOBAL SYSTEM SYSTEM FLUSH DISTRIBUTED ['FLUSH DISTRIBUTED'] TABLE SYSTEM FLUSH SYSTEM FLUSH LOGS ['FLUSH LOGS'] GLOBAL SYSTEM FLUSH SYSTEM FLUSH [] \N SYSTEM diff --git a/tests/queries/0_stateless/01272_suspicious_codecs.reference b/tests/queries/0_stateless/01272_suspicious_codecs.reference index 559b6df2693..aa93c3192d0 100644 --- a/tests/queries/0_stateless/01272_suspicious_codecs.reference +++ b/tests/queries/0_stateless/01272_suspicious_codecs.reference @@ -6,6 +6,9 @@ CREATE TABLE default.codecs5\n(\n `a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = CREATE TABLE default.codecs6\n(\n `a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 CREATE TABLE default.codecs7\n(\n `a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 CREATE TABLE default.codecs8\n(\n `a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs9\n(\n `a` UInt8 CODEC(Gorilla)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs10\n(\n `a` FixedString(2) CODEC(Gorilla)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs11\n(\n `a` Decimal(15, 5) CODEC(Gorilla)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 CREATE TABLE default.codecs1\n(\n `a` UInt8 CODEC(NONE, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 CREATE TABLE default.codecs2\n(\n `a` UInt8 CODEC(NONE, LZ4)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 CREATE TABLE default.codecs3\n(\n `a` UInt8 CODEC(LZ4, NONE)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 @@ -14,3 +17,6 @@ CREATE TABLE default.codecs5\n(\n `a` UInt8 CODEC(LZ4, ZSTD(1))\n)\nENGINE = CREATE TABLE default.codecs6\n(\n `a` UInt8 CODEC(Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 CREATE TABLE default.codecs7\n(\n `a` UInt8 CODEC(Delta(1), Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 CREATE TABLE default.codecs8\n(\n `a` UInt8 CODEC(LZ4, Delta(1))\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs9\n(\n `a` UInt8 CODEC(Gorilla)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs10\n(\n `a` FixedString(2) CODEC(Gorilla)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 +CREATE TABLE default.codecs11\n(\n `a` Decimal(15, 5) CODEC(Gorilla)\n)\nENGINE = MergeTree\nORDER BY tuple()\nSETTINGS index_granularity = 8192 diff --git a/tests/queries/0_stateless/01272_suspicious_codecs.sql b/tests/queries/0_stateless/01272_suspicious_codecs.sql index 5baa30e3cf4..082a8d08675 100644 --- a/tests/queries/0_stateless/01272_suspicious_codecs.sql +++ b/tests/queries/0_stateless/01272_suspicious_codecs.sql @@ -11,7 +11,7 @@ CREATE TABLE codecs c Float32 CODEC(Gorilla), d UInt8 CODEC(Delta, LZ4), e Float64 CODEC(Gorilla, ZSTD), - f UInt32 CODEC(Delta, Delta, Gorilla), + f UInt32 CODEC(Delta, Delta, T64), g DateTime CODEC(DoubleDelta), h DateTime64 CODEC(DoubleDelta, LZ4), i String CODEC(NONE) @@ -21,14 +21,19 @@ DROP TABLE codecs; -- test what should not work -CREATE TABLE codecs (a UInt8 CODEC(NONE, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE codecs (a UInt8 CODEC(NONE, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE codecs (a UInt8 CODEC(LZ4, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE codecs (a UInt8 CODEC(LZ4, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE codecs (a UInt8 CODEC(LZ4, ZSTD)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE codecs (a UInt8 CODEC(Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE codecs (a UInt8 CODEC(Delta, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } -CREATE TABLE codecs (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 } +CREATE TABLE codecs (a UInt8 CODEC(NONE, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(NONE, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(LZ4, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(LZ4, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(LZ4, ZSTD)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(Delta, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a UInt8 CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a FixedString(2) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a Decimal(15,5) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a Float64 CODEC(Delta, Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } +CREATE TABLE codecs (a Float32 CODEC(DoubleDelta, FPC)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS } -- test that sanity check is not performed in ATTACH query @@ -40,6 +45,9 @@ DROP TABLE IF EXISTS codecs5; DROP TABLE IF EXISTS codecs6; DROP TABLE IF EXISTS codecs7; DROP TABLE IF EXISTS codecs8; +DROP TABLE IF EXISTS codecs9; +DROP TABLE IF EXISTS codecs10; +DROP TABLE IF EXISTS codecs11; SET allow_suspicious_codecs = 1; @@ -51,6 +59,9 @@ CREATE TABLE codecs5 (a UInt8 CODEC(LZ4, ZSTD)) ENGINE = MergeTree ORDER BY tupl CREATE TABLE codecs6 (a UInt8 CODEC(Delta)) ENGINE = MergeTree ORDER BY tuple(); CREATE TABLE codecs7 (a UInt8 CODEC(Delta, Delta)) ENGINE = MergeTree ORDER BY tuple(); CREATE TABLE codecs8 (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE codecs9 (a UInt8 CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE codecs10 (a FixedString(2) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); +CREATE TABLE codecs11 (a Decimal(15,5) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); SET allow_suspicious_codecs = 0; @@ -62,6 +73,9 @@ SHOW CREATE TABLE codecs5; SHOW CREATE TABLE codecs6; SHOW CREATE TABLE codecs7; SHOW CREATE TABLE codecs8; +SHOW CREATE TABLE codecs9; +SHOW CREATE TABLE codecs10; +SHOW CREATE TABLE codecs11; DETACH TABLE codecs1; DETACH TABLE codecs2; @@ -71,6 +85,9 @@ DETACH TABLE codecs5; DETACH TABLE codecs6; DETACH TABLE codecs7; DETACH TABLE codecs8; +DETACH TABLE codecs9; +DETACH TABLE codecs10; +DETACH TABLE codecs11; ATTACH TABLE codecs1; ATTACH TABLE codecs2; @@ -80,6 +97,9 @@ ATTACH TABLE codecs5; ATTACH TABLE codecs6; ATTACH TABLE codecs7; ATTACH TABLE codecs8; +ATTACH TABLE codecs9; +ATTACH TABLE codecs10; +ATTACH TABLE codecs11; SHOW CREATE TABLE codecs1; SHOW CREATE TABLE codecs2; @@ -89,6 +109,9 @@ SHOW CREATE TABLE codecs5; SHOW CREATE TABLE codecs6; SHOW CREATE TABLE codecs7; SHOW CREATE TABLE codecs8; +SHOW CREATE TABLE codecs9; +SHOW CREATE TABLE codecs10; +SHOW CREATE TABLE codecs11; SELECT * FROM codecs1; SELECT * FROM codecs2; @@ -98,6 +121,9 @@ SELECT * FROM codecs5; SELECT * FROM codecs6; SELECT * FROM codecs7; SELECT * FROM codecs8; +SELECT * FROM codecs9; +SELECT * FROM codecs10; +SELECT * FROM codecs11; DROP TABLE codecs1; DROP TABLE codecs2; @@ -107,3 +133,6 @@ DROP TABLE codecs5; DROP TABLE codecs6; DROP TABLE codecs7; DROP TABLE codecs8; +DROP TABLE codecs9; +DROP TABLE codecs10; +DROP TABLE codecs11; diff --git a/tests/queries/0_stateless/01307_orc_output_format.sh b/tests/queries/0_stateless/01307_orc_output_format.sh index b17792af051..26535ca536b 100755 --- a/tests/queries/0_stateless/01307_orc_output_format.sh +++ b/tests/queries/0_stateless/01307_orc_output_format.sh @@ -11,11 +11,11 @@ $CLICKHOUSE_CLIENT --query="CREATE TABLE orc (uint8 UInt8, uint16 UInt16, uint32 $CLICKHOUSE_CLIENT --query="INSERT INTO orc VALUES (255, 65535, 4294967295, 100000000000, -128, -32768, -2147483648, -100000000000, 2.02, 10000.0000001, 'String', '2020', 18980, 1639872000, 1.0001, 1.00000001, 100000.00000000000001, 1), (4, 1234, 3244467295, 500000000000, -1, -256, -14741221, -7000000000, 100.1, 14321.032141201, 'Another string', '2000', 20000, 1839882000, 34.1234, 123123.123123123, 123123123.123123123123123, NULL), (42, 42, 42, 42, 42, 42, 42, 42, 42.42, 42.42, '42', '4242', 42, 42, 42.42, 42.42424242, 424242.42424242424242, 42)"; -$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC" > "$CURDIR"/tmp_orc_test_all_types.orc; +$CLICKHOUSE_CLIENT --query="SELECT * FROM orc FORMAT ORC" > "$CLICKHOUSE_TMP"/tmp_orc_test_all_types.orc; -cat "$CURDIR/tmp_orc_test_all_types.orc" | $CLICKHOUSE_CLIENT --query="INSERT INTO orc FORMAT ORC"; +cat "$CLICKHOUSE_TMP/tmp_orc_test_all_types.orc" | $CLICKHOUSE_CLIENT --query="INSERT INTO orc FORMAT ORC"; -rm "$CURDIR/tmp_orc_test_all_types.orc" +rm "$CLICKHOUSE_TMP/tmp_orc_test_all_types.orc" $CLICKHOUSE_CLIENT --query="SELECT * FROM orc"; diff --git a/tests/queries/0_stateless/01417_freeze_partition_verbose.sh b/tests/queries/0_stateless/01417_freeze_partition_verbose.sh index 1af700c1f6e..9f6ae260750 100755 --- a/tests/queries/0_stateless/01417_freeze_partition_verbose.sh +++ b/tests/queries/0_stateless/01417_freeze_partition_verbose.sh @@ -22,40 +22,40 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze_old_syntax SELECT toD ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE WITH NAME 'test_01417' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '3' WITH NAME 'test_01417_single_part' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze DETACH PARTITION '3';" ${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze VALUES (3, '3');" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze ATTACH PARTITION '3' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, old_part_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze DETACH PARTITION '5';" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7', ATTACH PART '5_6_6_0' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table FORMAT TSVWithNames" # Unfreeze partition ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze UNFREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" # Freeze partition with old syntax ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax FREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" # Unfreeze partition with old syntax ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax UNFREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" # Unfreeze the whole backup with SYSTEM query ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7_system'" diff --git a/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh b/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh index 4629450c1f9..1fd8a2b29c6 100755 --- a/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh +++ b/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh @@ -18,24 +18,24 @@ ${CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --query "INSE ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated FREEZE WITH NAME 'test_01417' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated FREEZE PARTITION '3' WITH NAME 'test_01417_single_part' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated DETACH PARTITION '3';" ${CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --query "INSERT INTO table_for_freeze_replicated VALUES (3, '3');" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated ATTACH PARTITION '3' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, old_part_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated DETACH PARTITION '5';" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated FREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7', ATTACH PART '5_0_0_0' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table FORMAT TSVWithNames" # teardown ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze_replicated SYNC;" diff --git a/tests/queries/0_stateless/01508_query_obfuscator.reference b/tests/queries/0_stateless/01508_query_obfuscator.reference index 7d910734dbd..9268b444d90 100644 --- a/tests/queries/0_stateless/01508_query_obfuscator.reference +++ b/tests/queries/0_stateless/01508_query_obfuscator.reference @@ -1,16 +1,16 @@ -SELECT 116, 'Qqfu://2020-02-10isqkc1203 sp 2000-05-27T18:38:01', 13e100, Obsidian_id_diverTeam, sweets(Workplace), avgIf(remote('128.0.0.1')) -SELECT treasury_mammoth_hazelnut between nutmeg and span, case when chive >= 116 then switching else null end +SELECT 116, 'Qqfu://2020-02-10isqkc1203 sp 2000-05-27T18:38:01', 13e100, Jewelry_id_studyBeast, algebra(Stable), avgIf(remote('128.0.0.1')) +SELECT surfboard_solitaire_crunch between understanding and populist, case when instrument >= 116 then poverty else null end SELECT - EarthquakeID, - Workout.ID, Workout.CoupleThrill, - MedalEMPIRE, - HOPE.ListingName, HOPE.ListingBomb, HOPE.ListingRamen, HOPE.ListingResult, HOPE.CoupleThrill, HOPE.Smile -FROM merge.marsh_agreement + BugleID, + Reliability.ID, Reliability.ExperiencePrevalence, + DepressiveTURKEY, + SPARK.RainmakerName, SPARK.RainmakerReligion, SPARK.RainmakerMisfit, SPARK.RainmakerAardvark, SPARK.ExperiencePrevalence, SPARK.Type +FROM merge.invader_schizophrenic WHERE - RecapitulationLeaver >= '2020-10-13' AND RecapitulationLeaver <= '2020-10-21' - AND MasonryID = 30750384 - AND intHash32(EyeballID) = 448362928 AND intHash64(EyeballID) = 12572659331310383983 - AND EarthquakeID IN (8195672321757027078, 7079643623150622129, 5057006826979676478, 7886875230160484653, 7494974311229040743) - AND Aide = 1 + PortraitInvasion >= '2020-10-13' AND PortraitInvasion <= '2020-10-21' + AND FrownID = 30750384 + AND intHash32(HaversackID) = 448362928 AND intHash64(HaversackID) = 12572659331310383983 + AND BugleID IN (8195672321757027078, 7079643623150622129, 5057006826979676478, 7886875230160484653, 7494974311229040743) + AND Hostel = 1 diff --git a/tests/queries/0_stateless/01666_blns_long.sql b/tests/queries/0_stateless/01666_blns_long.sql index 74054551b18..01295b11138 100644 --- a/tests/queries/0_stateless/01666_blns_long.sql +++ b/tests/queries/0_stateless/01666_blns_long.sql @@ -1,5 +1,4 @@ --- Tags: long, no-fasttest --- Tag no-fasttest: JSON functions +-- Tags: long -- https://github.com/minimaxir/big-list-of-naughty-strings diff --git a/tests/queries/0_stateless/01685_json_extract_double_as_float.sql b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql index ffbddf43907..0f9827a279d 100644 --- a/tests/queries/0_stateless/01685_json_extract_double_as_float.sql +++ b/tests/queries/0_stateless/01685_json_extract_double_as_float.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest WITH '{ "v":1.1}' AS raw SELECT diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.reference b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.reference deleted file mode 100644 index 70c19fc8ced..00000000000 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.reference +++ /dev/null @@ -1,12 +0,0 @@ -210 230 20 -SELECT - sum(a), - sumCount(b).1, - sumCount(b).2 -FROM fuse_tbl ----------NOT trigger fuse-------- -210 11.5 -SELECT - sum(a), - avg(b) -FROM fuse_tbl diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql deleted file mode 100644 index 375662eb405..00000000000 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ /dev/null @@ -1,14 +0,0 @@ -DROP TABLE IF EXISTS fuse_tbl; -CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; -INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20); - -SET optimize_syntax_fuse_functions = 1; -SET optimize_fuse_sum_count_avg = 1; - -SELECT sum(a), sum(b), count(b) from fuse_tbl; -EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; -SELECT '---------NOT trigger fuse--------'; -SELECT sum(a), avg(b) from fuse_tbl; -EXPLAIN SYNTAX SELECT sum(a), avg(b) from fuse_tbl; - -DROP TABLE fuse_tbl; diff --git a/tests/queries/0_stateless/01825_type_json_ephemeral.sql b/tests/queries/0_stateless/01825_type_json_ephemeral.sql index 4485510e419..ac047ad7040 100644 --- a/tests/queries/0_stateless/01825_type_json_ephemeral.sql +++ b/tests/queries/0_stateless/01825_type_json_ephemeral.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest SET allow_experimental_object_type = 1; diff --git a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 index 0d6bef7fadb..d2cc066a1b1 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 @@ -30,6 +30,8 @@ SELECT t1.key, t1.key2 FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key == SELECT '--'; SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2; +SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 0; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 1; -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT '--'; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.id > 2; diff --git a/tests/queries/0_stateless/01915_json_extract_raw_string.sql b/tests/queries/0_stateless/01915_json_extract_raw_string.sql index 98bff692d71..e81d527a3da 100644 --- a/tests/queries/0_stateless/01915_json_extract_raw_string.sql +++ b/tests/queries/0_stateless/01915_json_extract_raw_string.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest select JSONExtract('{"a": "123", "b": 456, "c": [7, 8, 9]}', 'Tuple(a String, b String, c String)'); diff --git a/tests/queries/0_stateless/02013_json_function_null_column.sql b/tests/queries/0_stateless/02013_json_function_null_column.sql index bf680dfe0b5..94a2320cefb 100644 --- a/tests/queries/0_stateless/02013_json_function_null_column.sql +++ b/tests/queries/0_stateless/02013_json_function_null_column.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest SELECT JSONExtract('{"string_value":null}', 'string_value', 'Nullable(String)') as x, toTypeName(x); SELECT JSONExtract('{"string_value":null}', 'string_value', 'String') as x, toTypeName(x); diff --git a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql index 8b5b62305ec..5237f832d25 100644 --- a/tests/queries/0_stateless/02016_aggregation_spark_bar.sql +++ b/tests/queries/0_stateless/02016_aggregation_spark_bar.sql @@ -35,3 +35,10 @@ SELECT sparkbar(5,toDate('2020-01-01'),toDate('2020-01-10'))(event_date,cnt) FRO DROP TABLE IF EXISTS spark_bar_test; WITH number DIV 50 AS k, number % 50 AS value SELECT k, sparkbar(50, 0, 99)(number, value) FROM numbers(100) GROUP BY k ORDER BY k; + +-- OOM guard +DROP TABLE IF EXISTS spark_bar_oom; +CREATE TABLE spark_bar_oom (x UInt64, y UInt8) Engine=MergeTree ORDER BY tuple(); +INSERT INTO spark_bar_oom VALUES (18446744073709551615,255),(0,0),(0,0),(4036797895307271799,163); +SELECT sparkbar(9)(x,y) FROM spark_bar_oom SETTINGS max_memory_usage = 100000000; -- { serverError 241 } +DROP TABLE IF EXISTS spark_bar_oom; diff --git a/tests/queries/0_stateless/02051_read_settings.reference.j2 b/tests/queries/0_stateless/02051_read_settings.reference.j2 index 391cf3adf35..ee6c4bdd918 100644 --- a/tests/queries/0_stateless/02051_read_settings.reference.j2 +++ b/tests/queries/0_stateless/02051_read_settings.reference.j2 @@ -1,5 +1,5 @@ {% for index_granularity_bytes in [0, 10 * 1024 * 1024] -%} -{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] -%} +{% for read_method in ['read', 'mmap', 'io_uring', 'pread_threadpool', 'pread_fake_async'] -%} {% for direct_io in [0, 1] -%} {% for prefetch in [0, 1] -%} {% for priority in [0, 1] -%} diff --git a/tests/queries/0_stateless/02051_read_settings.sql.j2 b/tests/queries/0_stateless/02051_read_settings.sql.j2 index 69dd3c264ba..1f121b0c268 100644 --- a/tests/queries/0_stateless/02051_read_settings.sql.j2 +++ b/tests/queries/0_stateless/02051_read_settings.sql.j2 @@ -19,7 +19,7 @@ settings as select number, repeat(toString(number), 5) from numbers(1e6); {# check each local_filesystem_read_method #} -{% for read_method in ['read', 'mmap', 'pread_threadpool', 'pread_fake_async'] %} +{% for read_method in ['read', 'mmap', 'io_uring', 'pread_threadpool', 'pread_fake_async'] %} {# check w/ O_DIRECT and w/o (min_bytes_to_use_direct_io) #} {% for direct_io in [0, 1] %} {# check local_filesystem_read_prefetch (just a smoke test) #} diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh index b28c56f9266..967c6538bb1 100755 --- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -97,37 +97,37 @@ echo 'Corner cases' echo 'TSV' echo -e "Some text\tCustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" echo -e "Some text\tCustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" echo -e "Some text\t123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo -e "Some text\tNU\tLL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'CSV' echo -e "Some text,CustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0" echo -e "Some text,CustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0" -echo -e "Some text,123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +echo -e "Some text,123NNN\n" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' -echo -e "Some text,NU,LL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +echo -e "Some text,NU,LL\n" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'Large custom NULL' $CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'" -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000', input_format_tsv_detect_header=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000', input_format_tsv_detect_header=0" rm $DATA_FILE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 9e065c455e9..80fe6aeefec 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -266,7 +266,9 @@ CREATE TABLE system.formats ( `name` String, `is_input` UInt8, - `is_output` UInt8 + `is_output` UInt8, + `supports_parallel_parsing` UInt8, + `supports_parallel_formatting` UInt8 ) ENGINE = SystemFormats COMMENT 'SYSTEM TABLE is built on the fly.' @@ -286,7 +288,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP CACHE' = 102, 'SYSTEM RELOAD CONFIG' = 103, 'SYSTEM RELOAD USERS' = 104, 'SYSTEM RELOAD SYMBOLS' = 105, 'SYSTEM RELOAD DICTIONARY' = 106, 'SYSTEM RELOAD MODEL' = 107, 'SYSTEM RELOAD FUNCTION' = 108, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 109, 'SYSTEM RELOAD' = 110, 'SYSTEM RESTART DISK' = 111, 'SYSTEM MERGES' = 112, 'SYSTEM TTL MERGES' = 113, 'SYSTEM FETCHES' = 114, 'SYSTEM MOVES' = 115, 'SYSTEM DISTRIBUTED SENDS' = 116, 'SYSTEM REPLICATED SENDS' = 117, 'SYSTEM SENDS' = 118, 'SYSTEM REPLICATION QUEUES' = 119, 'SYSTEM DROP REPLICA' = 120, 'SYSTEM SYNC REPLICA' = 121, 'SYSTEM RESTART REPLICA' = 122, 'SYSTEM RESTORE REPLICA' = 123, 'SYSTEM WAIT LOADING PARTS' = 124, 'SYSTEM SYNC DATABASE REPLICA' = 125, 'SYSTEM SYNC TRANSACTION LOG' = 126, 'SYSTEM SYNC FILE CACHE' = 127, 'SYSTEM FLUSH DISTRIBUTED' = 128, 'SYSTEM FLUSH LOGS' = 129, 'SYSTEM FLUSH' = 130, 'SYSTEM THREAD FUZZER' = 131, 'SYSTEM UNFREEZE' = 132, 'SYSTEM' = 133, 'dictGet' = 134, 'addressToLine' = 135, 'addressToLineWithInlines' = 136, 'addressToSymbol' = 137, 'demangle' = 138, 'INTROSPECTION' = 139, 'FILE' = 140, 'URL' = 141, 'REMOTE' = 142, 'MONGO' = 143, 'MEILISEARCH' = 144, 'MYSQL' = 145, 'POSTGRES' = 146, 'SQLITE' = 147, 'ODBC' = 148, 'JDBC' = 149, 'HDFS' = 150, 'S3' = 151, 'HIVE' = 152, 'SOURCES' = 153, 'CLUSTER' = 154, 'ALL' = 155, 'NONE' = 156), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -567,10 +569,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP CACHE' = 102, 'SYSTEM RELOAD CONFIG' = 103, 'SYSTEM RELOAD USERS' = 104, 'SYSTEM RELOAD SYMBOLS' = 105, 'SYSTEM RELOAD DICTIONARY' = 106, 'SYSTEM RELOAD MODEL' = 107, 'SYSTEM RELOAD FUNCTION' = 108, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 109, 'SYSTEM RELOAD' = 110, 'SYSTEM RESTART DISK' = 111, 'SYSTEM MERGES' = 112, 'SYSTEM TTL MERGES' = 113, 'SYSTEM FETCHES' = 114, 'SYSTEM MOVES' = 115, 'SYSTEM DISTRIBUTED SENDS' = 116, 'SYSTEM REPLICATED SENDS' = 117, 'SYSTEM SENDS' = 118, 'SYSTEM REPLICATION QUEUES' = 119, 'SYSTEM DROP REPLICA' = 120, 'SYSTEM SYNC REPLICA' = 121, 'SYSTEM RESTART REPLICA' = 122, 'SYSTEM RESTORE REPLICA' = 123, 'SYSTEM WAIT LOADING PARTS' = 124, 'SYSTEM SYNC DATABASE REPLICA' = 125, 'SYSTEM SYNC TRANSACTION LOG' = 126, 'SYSTEM SYNC FILE CACHE' = 127, 'SYSTEM FLUSH DISTRIBUTED' = 128, 'SYSTEM FLUSH LOGS' = 129, 'SYSTEM FLUSH' = 130, 'SYSTEM THREAD FUZZER' = 131, 'SYSTEM UNFREEZE' = 132, 'SYSTEM' = 133, 'dictGet' = 134, 'addressToLine' = 135, 'addressToLineWithInlines' = 136, 'addressToSymbol' = 137, 'demangle' = 138, 'INTROSPECTION' = 139, 'FILE' = 140, 'URL' = 141, 'REMOTE' = 142, 'MONGO' = 143, 'MEILISEARCH' = 144, 'MYSQL' = 145, 'POSTGRES' = 146, 'SQLITE' = 147, 'ODBC' = 148, 'JDBC' = 149, 'HDFS' = 150, 'S3' = 151, 'HIVE' = 152, 'SOURCES' = 153, 'CLUSTER' = 154, 'ALL' = 155, 'NONE' = 156), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP CACHE' = 102, 'SYSTEM RELOAD CONFIG' = 103, 'SYSTEM RELOAD USERS' = 104, 'SYSTEM RELOAD SYMBOLS' = 105, 'SYSTEM RELOAD DICTIONARY' = 106, 'SYSTEM RELOAD MODEL' = 107, 'SYSTEM RELOAD FUNCTION' = 108, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 109, 'SYSTEM RELOAD' = 110, 'SYSTEM RESTART DISK' = 111, 'SYSTEM MERGES' = 112, 'SYSTEM TTL MERGES' = 113, 'SYSTEM FETCHES' = 114, 'SYSTEM MOVES' = 115, 'SYSTEM DISTRIBUTED SENDS' = 116, 'SYSTEM REPLICATED SENDS' = 117, 'SYSTEM SENDS' = 118, 'SYSTEM REPLICATION QUEUES' = 119, 'SYSTEM DROP REPLICA' = 120, 'SYSTEM SYNC REPLICA' = 121, 'SYSTEM RESTART REPLICA' = 122, 'SYSTEM RESTORE REPLICA' = 123, 'SYSTEM WAIT LOADING PARTS' = 124, 'SYSTEM SYNC DATABASE REPLICA' = 125, 'SYSTEM SYNC TRANSACTION LOG' = 126, 'SYSTEM SYNC FILE CACHE' = 127, 'SYSTEM FLUSH DISTRIBUTED' = 128, 'SYSTEM FLUSH LOGS' = 129, 'SYSTEM FLUSH' = 130, 'SYSTEM THREAD FUZZER' = 131, 'SYSTEM UNFREEZE' = 132, 'SYSTEM' = 133, 'dictGet' = 134, 'addressToLine' = 135, 'addressToLineWithInlines' = 136, 'addressToSymbol' = 137, 'demangle' = 138, 'INTROSPECTION' = 139, 'FILE' = 140, 'URL' = 141, 'REMOTE' = 142, 'MONGO' = 143, 'MEILISEARCH' = 144, 'MYSQL' = 145, 'POSTGRES' = 146, 'SQLITE' = 147, 'ODBC' = 148, 'JDBC' = 149, 'HDFS' = 150, 'S3' = 151, 'HIVE' = 152, 'SOURCES' = 153, 'CLUSTER' = 154, 'ALL' = 155, 'NONE' = 156)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index e9f35582f15..d4702887e7f 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -33,10 +33,10 @@ echo "[\"255.255.255.255trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' @@ -45,10 +45,10 @@ echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02122_parallel_formatting.lib b/tests/queries/0_stateless/02122_parallel_formatting.lib index 79fe98520be..56119012788 100755 --- a/tests/queries/0_stateless/02122_parallel_formatting.lib +++ b/tests/queries/0_stateless/02122_parallel_formatting.lib @@ -5,8 +5,8 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -parallel_file=$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME"_parallel" -non_parallel_file=$CUR_DIR/$CLICKHOUSE_TEST_UNIQUE_NAME"_non_parallel" +parallel_file=$CLICKHOUSE_TMP/$CLICKHOUSE_TEST_UNIQUE_NAME"_parallel" +non_parallel_file=$CLICKHOUSE_TMP/$CLICKHOUSE_TEST_UNIQUE_NAME"_non_parallel" format=$1 diff --git a/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference b/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference index 8b89f5847b6..6165079994f 100644 --- a/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference +++ b/tests/queries/0_stateless/02226_analyzer_or_like_combine.reference @@ -56,7 +56,7 @@ QUERY id: 0 JOIN TREE TABLE id: 5, table_name: system.one WHERE - FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: Bool + FUNCTION id: 6, function_name: or, function_type: ordinary, result_type: UInt8 ARGUMENTS LIST id: 7, nodes: 2 FUNCTION id: 8, function_name: multiMatchAny, function_type: ordinary, result_type: UInt8 @@ -67,7 +67,7 @@ QUERY id: 0 LIST id: 3, nodes: 1 CONSTANT id: 4, constant_value: \'Привет, World\', constant_value_type: String CONSTANT id: 10, constant_value: Array_[\'^hell\', \'(?i)привет\', \'(?i)^world\'], constant_value_type: Array(String) - CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: Bool + CONSTANT id: 11, constant_value: UInt64_0, constant_value_type: UInt8 SETTINGS optimize_or_like_chain=1 allow_experimental_analyzer=1 SELECT materialize(\'Привет, World\') AS s1, diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference index 5fd48ae580a..be82d744a3b 100644 --- a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference @@ -1 +1 @@ -c1 Nullable(String) +c1 Nullable(DateTime64(9)) diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.reference b/tests/queries/0_stateless/02234_cast_to_ip_address.reference index 96aae2a978c..9023b36a9bf 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.reference +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.reference @@ -31,6 +31,9 @@ IPv6 functions ::ffff:127.0.0.1 ::ffff:127.0.0.1 ::ffff:127.0.0.1 +:: +\N +100000000 -- ::ffff:127.0.0.1 -- diff --git a/tests/queries/0_stateless/02234_cast_to_ip_address.sql b/tests/queries/0_stateless/02234_cast_to_ip_address.sql index 436f232e441..6c65fe86cc9 100644 --- a/tests/queries/0_stateless/02234_cast_to_ip_address.sql +++ b/tests/queries/0_stateless/02234_cast_to_ip_address.sql @@ -56,6 +56,12 @@ SELECT toIPv6('::ffff:127.0.0.1'); SELECT toIPv6OrDefault('::ffff:127.0.0.1'); SELECT toIPv6OrNull('::ffff:127.0.0.1'); +SELECT toIPv6('::.1.2.3'); --{serverError CANNOT_PARSE_IPV6} +SELECT toIPv6OrDefault('::.1.2.3'); +SELECT toIPv6OrNull('::.1.2.3'); + +SELECT count() FROM numbers_mt(100000000) WHERE NOT ignore(toIPv6OrZero(randomString(8))); + SELECT '--'; SELECT cast('test' , 'IPv6'); --{serverError CANNOT_PARSE_IPV6} diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference index 48d91c6f142..f4b9f7bb127 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference @@ -3,7 +3,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; SET skip_download_if_exceeds_query_cache=1; -SET max_query_cache_size=128; +SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; INSERT INTO test SELECT number, toString(number) FROM numbers(100); diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index 7dd975b27ee..94eb4bc5ccd 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -5,7 +5,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; SET skip_download_if_exceeds_query_cache=1; -SET max_query_cache_size=128; +SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; diff --git a/tests/queries/0_stateless/02244_issue_35598_fuse.reference b/tests/queries/0_stateless/02244_issue_35598_fuse.reference deleted file mode 100644 index 6ce84b402a3..00000000000 --- a/tests/queries/0_stateless/02244_issue_35598_fuse.reference +++ /dev/null @@ -1,2 +0,0 @@ -0 0 nan -0 0 nan diff --git a/tests/queries/0_stateless/02244_issue_35598_fuse.sql b/tests/queries/0_stateless/02244_issue_35598_fuse.sql deleted file mode 100644 index a590854eb6c..00000000000 --- a/tests/queries/0_stateless/02244_issue_35598_fuse.sql +++ /dev/null @@ -1,5 +0,0 @@ -SELECT sum(x), count(x), avg(x) FROM (SELECT number :: Decimal32(0) AS x FROM numbers(0)) -SETTINGS optimize_syntax_fuse_functions = 0, optimize_fuse_sum_count_avg = 0; - -SELECT sum(x), count(x), avg(x) FROM (SELECT number :: Decimal32(0) AS x FROM numbers(0)) -SETTINGS optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; diff --git a/tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh b/tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh index 00f3904192f..7e25e099626 100755 --- a/tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh +++ b/tests/queries/0_stateless/02246_clickhouse_local_drop_database.sh @@ -22,3 +22,5 @@ USE test; CREATE TABLE test (id Int32) ENGINE=MergeTree() ORDER BY id; DROP DATABASE test; """ + +rm -r $dir diff --git a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql index b5b0508deaa..a5fd8e57ad5 100644 --- a/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql +++ b/tests/queries/0_stateless/02286_tuple_numeric_identifier.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest DROP TABLE IF EXISTS t_tuple_numeric; diff --git a/tests/queries/0_stateless/02352_interactive_queries_from_file.expect b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect index d15b804b0b9..a34fc9909f8 100755 --- a/tests/queries/0_stateless/02352_interactive_queries_from_file.expect +++ b/tests/queries/0_stateless/02352_interactive_queries_from_file.expect @@ -43,3 +43,5 @@ expect ":) " send -- "exit\r" expect eof + +spawn bash -c "rm queries_02352" diff --git a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql index 0572536e19e..b324f834053 100644 --- a/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql +++ b/tests/queries/0_stateless/02353_simdjson_buffer_overflow.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest, no-msan -- Tag: no-msan: fuzzer can make this query very memory hungry, and under MSan, the MemoryTracker cannot account for the additional memory used by sanitizer, and OOM happens. SET max_execution_time = 3; diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference index ba924f5daa2..bafa70556e7 100644 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference @@ -1,5 +1,5 @@ -- EXPLAIN PLAN sorting for MergeTree w/o sorting key --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC @@ -21,49 +21,49 @@ MergeSortingTransform × 3 LimitsCheckingTransform × 3 PartialSortingTransform × 3 -- ExpressionStep preserves sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC Sorting (Chunk): a ASC Sorting (Stream): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1 Sorting (None) Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC -- ExpressionStep breaks sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1 Sorting (Global): plus(a, 1) ASC Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC Sorting (None) Sorting (Chunk): a ASC -- FilterStep preserves sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC -- FilterStep breaks sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0 Sorting (None) Sorting (None) Sorting (Chunk): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0 Sorting (None) Sorting (None) Sorting (Chunk): a ASC -- aliases break sorting order --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC @@ -73,14 +73,14 @@ Sorting (Global): a ASC Sorting (Chunk): a ASC Sorting (Stream): a ASC -- aliases DONT break sorting order --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y) +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y) Sorting (Global): x ASC, y ASC Sorting (Sorting for ORDER BY) Sorting (Global): x ASC, y ASC Sorting (Chunk): a ASC, b ASC Sorting (Stream): a ASC, b ASC -- actions chain breaks sorting order: input(column a)->sipHash64(column a)->alias(sipHash64(column a), a)->plus(alias a, 1) --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1 Sorting (None) Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC @@ -90,7 +90,7 @@ Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC -- check that correct sorting info is provided in case of only prefix of sorting key is in ORDER BY clause but all sorting key columns returned by query --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh index 62051701cb6..0678ff63e3f 100755 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) DISABLE_OPTIMIZATION="set optimize_sorting_by_input_stream_properties=0;set query_plan_read_in_order=0;set max_threads=3" ENABLE_OPTIMIZATION="set optimize_sorting_by_input_stream_properties=1;set query_plan_read_in_order=1;set optimize_read_in_order=1;set max_threads=3" -MAKE_OUTPUT_STABLE="set optimize_read_in_order=1;set max_threads=3" +MAKE_OUTPUT_STABLE="set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0" GREP_SORTING="grep 'PartialSortingTransform\|LimitsCheckingTransform\|MergeSortingTransform\|MergingSortedTransform'" GREP_SORTMODE="grep 'Sorting ('" TRIM_LEADING_SPACES="sed -e 's/^[ \t]*//'" diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index c41e2d3abae..f4a1e75e398 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -1,5 +1,13 @@ -- Tags: no-parallel +drop table if exists pr_t; +drop table if exists dist_pr_t; +drop table if exists dist_t_different_dbs; +drop table if exists shard_1.t_different_dbs; +drop table if exists t_different_dbs; +drop table if exists dist_t; +drop table if exists t; + create table t(a UInt64, b UInt64) engine=MergeTree order by a; system stop merges t; insert into t select number, number from numbers_mt(1e6); @@ -64,6 +72,7 @@ select a, count() from dist_pr_t group by a, b order by a limit 5 offset 500; -- { echoOff } -- +drop table pr_t; drop table dist_pr_t; drop table dist_t_different_dbs; drop table shard_1.t_different_dbs; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 9d747f9c572..e41249af54c 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -344,8 +344,6 @@ hasAny hasColumnInTable hasSubstr hasThreadFuzzer -hasToken -hasTokenCaseInsensitive hashid hex hiveHash @@ -648,7 +646,9 @@ simpleJSONHas sin sinh sipHash128 +sipHash128Keyed sipHash64 +sipHash64Keyed sleep sleepEachRow snowflakeToDateTime diff --git a/tests/queries/0_stateless/02452_check_low_cardinality.sql b/tests/queries/0_stateless/02452_check_low_cardinality.sql index e9cb8c800c7..166be281405 100644 --- a/tests/queries/0_stateless/02452_check_low_cardinality.sql +++ b/tests/queries/0_stateless/02452_check_low_cardinality.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest DROP TABLE IF EXISTS test_low_cardinality_string; DROP TABLE IF EXISTS test_low_cardinality_uuid; DROP TABLE IF EXISTS test_low_cardinality_int; diff --git a/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql index 449713d396f..7466bd7e282 100644 --- a/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql +++ b/tests/queries/0_stateless/02455_extract_fixed_string_from_nested_json.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest DROP TABLE IF EXISTS test_fixed_string_nested_json; CREATE TABLE test_fixed_string_nested_json (data String) ENGINE MergeTree ORDER BY data; INSERT INTO test_fixed_string_nested_json (data) VALUES ('{"a" : {"b" : {"c" : 1, "d" : "str"}}}'); diff --git a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql index 664c52e772f..721b7885ecb 100644 --- a/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql +++ b/tests/queries/0_stateless/02473_extract_low_cardinality_from_json.sql @@ -1,3 +1,2 @@ --- Tags: no-fasttest SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a LowCardinality(String), b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); SELECT JSONExtract('{"a" : {"b" : {"c" : 1, "d" : "str"}}}', 'Tuple( a String, b LowCardinality(String), c LowCardinality(String), d LowCardinality(String))'); diff --git a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql index 336dda411da..cfc47e00cba 100644 --- a/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql +++ b/tests/queries/0_stateless/02474_extract_fixedstring_from_json.sql @@ -1,4 +1,3 @@ --- Tags: no-fasttest SELECT JSONExtract('{"a": 123456}', 'FixedString(11)'); SELECT JSONExtract('{"a": 123456}', 'FixedString(12)'); SELECT JSONExtract('{"a": "123456"}', 'a', 'FixedString(5)'); diff --git a/tests/queries/0_stateless/02476_fuse_sum_count.sql b/tests/queries/0_stateless/02476_fuse_sum_count.sql index ee65d32d0cf..315bbd10a65 100644 --- a/tests/queries/0_stateless/02476_fuse_sum_count.sql +++ b/tests/queries/0_stateless/02476_fuse_sum_count.sql @@ -1,5 +1,5 @@ SET allow_experimental_analyzer = 1; -SET optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; +SET optimize_syntax_fuse_functions = 1; DROP TABLE IF EXISTS fuse_tbl; diff --git a/tests/queries/0_stateless/02477_fuse_quantiles.sql b/tests/queries/0_stateless/02477_fuse_quantiles.sql index efd861ad7f3..c0719d771d7 100644 --- a/tests/queries/0_stateless/02477_fuse_quantiles.sql +++ b/tests/queries/0_stateless/02477_fuse_quantiles.sql @@ -1,5 +1,5 @@ SET allow_experimental_analyzer = 1; -SET optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; +SET optimize_syntax_fuse_functions = 1; DROP TABLE IF EXISTS fuse_tbl; diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference index fdab24700ac..d01bb5715ad 100644 --- a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference +++ b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference @@ -246,3 +246,40 @@ QUERY id: 0, group_by_type: grouping_sets ARGUMENTS LIST id: 53, nodes: 1 COLUMN id: 54, column_name: number, result_type: UInt64, source_id: 11 +QUERY id: 0, group_by_type: grouping_sets + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 3, table_function_name: numbers + ARGUMENTS + LIST id: 4, nodes: 1 + CONSTANT id: 5, constant_value: UInt64_1000, constant_value_type: UInt16 + GROUP BY + LIST id: 6, nodes: 3 + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: divide, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 19, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 20, function_name: divide, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql index 0c757cb111c..b51233f734c 100644 --- a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql +++ b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql @@ -15,3 +15,12 @@ SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY GROUPING SETS (((number % 2) * (number % 3), number % 3), (number % 2)) HAVING avg(log(2) * number) > 3465735.3 ORDER BY k; + +EXPLAIN QUERY TREE run_passes=1 +SELECT count() FROM numbers(1000) +GROUP BY GROUPING SETS + ( + (number, number + 1, number +2), + (number % 2, number % 3), + (number / 2, number / 3) + ); diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh index 27549499a45..4d588dabeb9 100755 --- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh +++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh @@ -43,9 +43,7 @@ query_with_retry "OPTIMIZE TABLE load_parts_refcounts FINAL SETTINGS optimize_th $CLICKHOUSE_CLIENT --query "DETACH TABLE load_parts_refcounts" $CLICKHOUSE_CLIENT --query "ATTACH TABLE load_parts_refcounts" -query_with_retry " - SELECT throwIf(count() == 0) FROM system.parts - WHERE database = '$CLICKHOUSE_DATABASE' AND table = 'load_parts_refcounts' AND NOT active FORMAT Null" +$CLICKHOUSE_CLIENT --query "SYSTEM WAIT LOADING PARTS load_parts_refcounts" $CLICKHOUSE_CLIENT --query " SELECT DISTINCT refcount FROM system.parts diff --git a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference index f517be778ed..dfa09193761 100644 --- a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference +++ b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference @@ -31,6 +31,7 @@ select * from (explain pipeline select sum(x) from t settings max_threads=4, max Resize 32 → 16 MergeTreeThread × 32 0 → 1 -- For read-in-order, disable everything +set query_plan_remove_redundant_sorting=0; -- to keep reading in order select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1; 49999995000000 select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%'; diff --git a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql index 29fb6062a8e..bf124092b41 100644 --- a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql +++ b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql @@ -1,3 +1,4 @@ +drop table if exists t; create table t (x UInt64) engine = MergeTree order by x; insert into t select number from numbers_mt(10000000) settings max_insert_threads=8; @@ -20,7 +21,11 @@ select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading= select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeThread%'; -- For read-in-order, disable everything +set query_plan_remove_redundant_sorting=0; -- to keep reading in order select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1; select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%'; select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8, optimize_read_in_order=1, query_plan_read_in_order=1; select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%'; + +-- { echoOff } +drop table t; diff --git a/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.reference b/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.reference new file mode 100644 index 00000000000..d2397bbcd34 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.reference @@ -0,0 +1,6 @@ +1 +1 +--- +1 +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.sql b/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.sql new file mode 100644 index 00000000000..9440a1fd9c0 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_case_agnostic_matching.sql @@ -0,0 +1,29 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +-- Start with empty query cache (QC) and query log +SYSTEM DROP QUERY CACHE; +DROP TABLE system.query_log SYNC; + +-- Insert an entry into the query cache. +SELECT 1 SETTINGS use_query_cache = true; +-- Check that entry in QC exists +SELECT COUNT(*) FROM system.query_cache; + +-- Run the same SELECT but with different case (--> select). We want its result to be served from the QC. +SELECT '---'; +select 1 SETTINGS use_query_cache = true; + +-- There should still be just one entry in the QC +SELECT COUNT(*) FROM system.query_cache; + +-- The second query should cause a QC hit. +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryCacheHits'], ProfileEvents['QueryCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'select 1 SETTINGS use_query_cache = true;'; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.reference b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference new file mode 100644 index 00000000000..2f1465d1598 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.reference @@ -0,0 +1,3 @@ +1 +1 +0 diff --git a/tests/queries/0_stateless/02494_query_cache_drop_cache.sql b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql new file mode 100644 index 00000000000..1f61472fcb0 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_drop_cache.sql @@ -0,0 +1,12 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +-- Cache query result in query cache +SELECT 1 SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; + +-- No query results are cached after DROP +SYSTEM DROP QUERY CACHE; +SELECT count(*) FROM system.query_cache; diff --git a/tests/queries/0_stateless/02494_query_cache_eligible_queries.reference b/tests/queries/0_stateless/02494_query_cache_eligible_queries.reference new file mode 100644 index 00000000000..33a09d872dd --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_eligible_queries.reference @@ -0,0 +1,19 @@ +1 +1 +0 +0 +0 +0 +eligible_test +0 +1 +0 +a String +b String +0 +1 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql new file mode 100644 index 00000000000..b4bc9e2c258 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_eligible_queries.sql @@ -0,0 +1,68 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; +DROP TABLE IF EXISTS eligible_test; +DROP TABLE IF EXISTS eligible_test2; + +-- enable query cache session-wide but also force it individually in each of below statements +SET use_query_cache = true; + +-- check that SELECT statements create entries in the query cache ... +SELECT 1 SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +-- ... and all other statements also should not create entries: + +-- CREATE +CREATE TABLE eligible_test (a String) ENGINE=MergeTree ORDER BY a; -- SETTINGS use_query_cache = true; -- SETTINGS rejected as unknown +SELECT COUNT(*) FROM system.query_cache; + +-- ALTER +ALTER TABLE eligible_test ADD COLUMN b String SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- INSERT +INSERT INTO eligible_test VALUES('a', 'b'); -- SETTINGS use_query_cache = true; -- SETTINGS rejected as unknown +SELECT COUNT(*) FROM system.query_cache; +INSERT INTO eligible_test SELECT * FROM eligible_test SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- SHOW +SHOW TABLES SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- CHECK +CHECK TABLE eligible_test SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- DESCRIBE +DESCRIBE TABLE eligible_test SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- EXISTS +EXISTS TABLE eligible_test SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- KILL +KILL QUERY WHERE query_id='3-857d-4a57-9ee0-3c7da5d60a90' SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- OPTIMIZE +OPTIMIZE TABLE eligible_test FINAL SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- TRUNCATE +TRUNCATE TABLE eligible_test SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +-- RENAME +RENAME TABLE eligible_test TO eligible_test2 SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; +DROP TABLE eligible_test2; diff --git a/tests/queries/0_stateless/02494_query_cache_events.reference b/tests/queries/0_stateless/02494_query_cache_events.reference new file mode 100644 index 00000000000..db60d3699e0 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_events.reference @@ -0,0 +1,6 @@ +--- +1 +0 1 +--- +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_events.sql b/tests/queries/0_stateless/02494_query_cache_events.sql new file mode 100644 index 00000000000..d775467d525 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_events.sql @@ -0,0 +1,32 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +-- Start with empty query cache QC and query log +SYSTEM DROP QUERY CACHE; +DROP TABLE system.query_log SYNC; + +-- Run a query with QC on. The first execution is a QC miss. +SELECT '---'; +SELECT 1 SETTINGS use_query_cache = true; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryCacheHits'], ProfileEvents['QueryCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS use_query_cache = true;'; + + +-- Run previous query again with query cache on +SELECT '---'; +SELECT 1 SETTINGS use_query_cache = true; + +DROP TABLE system.query_log SYNC; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryCacheHits'], ProfileEvents['QueryCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS use_query_cache = true;'; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_exception_handling.reference b/tests/queries/0_stateless/02494_query_cache_exception_handling.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_exception_handling.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02494_query_cache_exception_handling.sql b/tests/queries/0_stateless/02494_query_cache_exception_handling.sql new file mode 100644 index 00000000000..4d686d81ed3 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_exception_handling.sql @@ -0,0 +1,12 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- If an exception is thrown during query execution, no entry must be created in the query cache +SELECT throwIf(1) SETTINGS use_query_cache = true; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_explain.reference b/tests/queries/0_stateless/02494_query_cache_explain.reference new file mode 100644 index 00000000000..ecc965ac391 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_explain.reference @@ -0,0 +1,21 @@ +1 +1 +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromStorage (SystemNumbers) +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromStorage (SystemNumbers) +(Expression) +ExpressionTransform + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 +(Expression) +ExpressionTransform + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 +1 diff --git a/tests/queries/0_stateless/02494_query_cache_explain.sql b/tests/queries/0_stateless/02494_query_cache_explain.sql new file mode 100644 index 00000000000..67717efde13 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_explain.sql @@ -0,0 +1,23 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- Run a silly query with a non-trivial plan and put the result into the query cache QC +SELECT 1 + number from system.numbers LIMIT 1 SETTINGS use_query_cache = true; +SELECT count(*) FROM system.query_cache; + +-- EXPLAIN PLAN should show the same regardless if the result is calculated or read from the QC +EXPLAIN PLAN SELECT 1 + number from system.numbers LIMIT 1; +EXPLAIN PLAN SELECT 1 + number from system.numbers LIMIT 1 SETTINGS use_query_cache = true; -- (*) + +-- EXPLAIN PIPELINE should show the same regardless if the result is calculated or read from the QC +EXPLAIN PIPELINE SELECT 1 + number from system.numbers LIMIT 1; +EXPLAIN PIPELINE SELECT 1 + number from system.numbers LIMIT 1 SETTINGS use_query_cache = true; -- (*) + +-- Statements (*) must not cache their results into the QC +SELECT count(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_min_query_duration.reference b/tests/queries/0_stateless/02494_query_cache_min_query_duration.reference new file mode 100644 index 00000000000..a081d0a9c1a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_min_query_duration.reference @@ -0,0 +1,5 @@ +1 +1 +--- +1 +0 diff --git a/tests/queries/0_stateless/02494_query_cache_min_query_duration.sql b/tests/queries/0_stateless/02494_query_cache_min_query_duration.sql new file mode 100644 index 00000000000..7d759c86130 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_min_query_duration.sql @@ -0,0 +1,20 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- This creates an entry in the query cache ... +SELECT 1 SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; + +SELECT '---'; + +-- ... but this does not because the query executes much faster than the specified minumum query duration for caching the result +SELECT 1 SETTINGS use_query_cache = true, query_cache_min_query_duration = 10000; +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_min_query_runs.reference b/tests/queries/0_stateless/02494_query_cache_min_query_runs.reference new file mode 100644 index 00000000000..ae3db066a88 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_min_query_runs.reference @@ -0,0 +1,14 @@ +1 +1 +--- +1 +0 +1 +1 +--- +1 +0 +1 +0 +1 +1 diff --git a/tests/queries/0_stateless/02494_query_cache_min_query_runs.sql b/tests/queries/0_stateless/02494_query_cache_min_query_runs.sql new file mode 100644 index 00000000000..2401beeab13 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_min_query_runs.sql @@ -0,0 +1,34 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- Cache the query after the 1st query invocation +SELECT 1 SETTINGS use_query_cache = true, query_cache_min_query_runs = 0; +SELECT COUNT(*) FROM system.query_cache; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- Cache the query result after the 2nd query invocation +SELECT 1 SETTINGS use_query_cache = true, query_cache_min_query_runs = 1; +SELECT COUNT(*) FROM system.query_cache; +SELECT 1 SETTINGS use_query_cache = true, query_cache_min_query_runs = 1; +SELECT COUNT(*) FROM system.query_cache; + +SELECT '---'; + +SYSTEM DROP QUERY CACHE; + +-- Cache the query result after the 3rd query invocation +SELECT 1 SETTINGS use_query_cache = true, query_cache_min_query_runs = 2; +SELECT COUNT(*) FROM system.query_cache; +SELECT 1 SETTINGS use_query_cache = true, query_cache_min_query_runs = 2; +SELECT COUNT(*) FROM system.query_cache; +SELECT 1 SETTINGS use_query_cache = true, query_cache_min_query_runs = 2; +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference new file mode 100644 index 00000000000..cb6165c307a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.reference @@ -0,0 +1,5 @@ +1 +0 +--- +1 +1 diff --git a/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql new file mode 100644 index 00000000000..534d63aa427 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_nondeterministic_functions.sql @@ -0,0 +1,18 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- rand() is non-deterministic, with default settings no entry in the query cache should be created +SELECT COUNT(rand(1)) SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +SELECT '---'; + +-- But an entry can be forced using a setting +SELECT COUNT(RAND(1)) SETTINGS use_query_cache = true, query_cache_store_results_of_queries_with_nondeterministic_functions = true; +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_normalize_ast.reference b/tests/queries/0_stateless/02494_query_cache_normalize_ast.reference new file mode 100644 index 00000000000..bc32b2f2cf3 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_normalize_ast.reference @@ -0,0 +1,5 @@ +1 +1 +--- +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql b/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql new file mode 100644 index 00000000000..7e3cd273312 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_normalize_ast.sql @@ -0,0 +1,31 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +-- Start with empty query cache (QC) and query log. +SYSTEM DROP QUERY CACHE; +DROP TABLE system.query_log SYNC; + +-- Run query whose result gets cached in the query cache. +-- Besides "use_query_cache", pass two more knobs (one QC-specific knob and one non-QC-specific knob). We just care +-- *that* they are passed and not about their effect. +SELECT 1 SETTINGS use_query_cache = true, query_cache_store_results_of_queries_with_nondeterministic_functions = true, max_threads = 16; + +-- Check that entry in QC exists +SELECT COUNT(*) FROM system.query_cache; + +-- Run the same SELECT but with different SETTINGS. We want its result to be served from the QC (--> passive mode, achieve it by +-- disabling active mode) +SELECT '---'; +SELECT 1 SETTINGS use_query_cache = true, enable_writes_to_query_cache = false, max_threads = 16; + +-- Technically, both SELECT queries have different ASTs, leading to different QC keys. QC does some AST normalization (erase all +-- QC-related settings) such that the keys match regardless. Verify by checking that the second query caused a QC hit. +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryCacheHits'], ProfileEvents['QueryCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS use_query_cache = true, enable_writes_to_query_cache = false, max_threads = 16;'; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_passive_usage.reference b/tests/queries/0_stateless/02494_query_cache_passive_usage.reference new file mode 100644 index 00000000000..edff09773d1 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_passive_usage.reference @@ -0,0 +1,12 @@ +1 +0 +----- +1 +0 +----- +1 +1 +----- +1 +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_cache_passive_usage.sql b/tests/queries/0_stateless/02494_query_cache_passive_usage.sql new file mode 100644 index 00000000000..8f1e3972b6d --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_passive_usage.sql @@ -0,0 +1,41 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +-- Start with empty query cache (QC). +SYSTEM DROP QUERY CACHE; + +-- By default, don't write query result into QC. +SELECT 1; +SELECT COUNT(*) FROM system.query_cache; + +SELECT '-----'; + +-- Try to retrieve query from empty QC using the passive mode. Do this by disabling the active mode. The cache should still be empty (no insert). +SELECT 1 SETTINGS use_query_cache = true, enable_writes_to_query_cache = false; +SELECT COUNT(*) FROM system.query_cache; + +SELECT '-----'; + +-- Put query into cache. +SELECT 1 SETTINGS use_query_cache = true; +SELECT COUNT(*) FROM system.query_cache; + +SELECT '-----'; + +-- Run same query with passive mode again. There must still be one entry in the QC and we must have a QC hit. + +-- Get rid of log of previous SELECT +DROP TABLE system.query_log SYNC; + +SELECT 1 SETTINGS use_query_cache = true, enable_writes_to_query_cache = false; +SELECT COUNT(*) FROM system.query_cache; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryCacheHits'], ProfileEvents['QueryCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS use_query_cache = true, enable_writes_to_query_cache = false;'; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.reference b/tests/queries/0_stateless/02494_query_cache_secrets.reference new file mode 100644 index 00000000000..dd6341262bc --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_secrets.reference @@ -0,0 +1,2 @@ +A2193552DCF8A9F99AC35F86BC4D2FFD +SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS diff --git a/tests/queries/0_stateless/02494_query_cache_secrets.sql b/tests/queries/0_stateless/02494_query_cache_secrets.sql new file mode 100644 index 00000000000..99a972b003c --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_secrets.sql @@ -0,0 +1,15 @@ +-- Tags: no-parallel, no-fasttest +-- Tag no-fasttest: Depends on OpenSSL +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- Cache a result of a query with secret in the query cache +SELECT hex(encrypt('aes-128-ecb', 'plaintext', 'passwordpassword')) SETTINGS use_query_cache = true; + +-- The secret should not be revealed in system.query_cache +SELECT query FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02494_query_cache_ttl_long.reference b/tests/queries/0_stateless/02494_query_cache_ttl_long.reference new file mode 100644 index 00000000000..b8c79f4aee6 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_ttl_long.reference @@ -0,0 +1,10 @@ +1 +1 +0 +0 +0 +1 +--- +1 +1 +0 diff --git a/tests/queries/0_stateless/02494_query_cache_ttl_long.sql b/tests/queries/0_stateless/02494_query_cache_ttl_long.sql new file mode 100644 index 00000000000..135ddf2195c --- /dev/null +++ b/tests/queries/0_stateless/02494_query_cache_ttl_long.sql @@ -0,0 +1,31 @@ +-- Tags: no-fasttest, no-parallel, long +-- Tag no-fasttest: Test runtime is > 6 sec +-- Tag long: Test runtime is > 6 sec +-- Tag no-parallel: Messes with internal cache + +SET allow_experimental_query_cache = true; + +SYSTEM DROP QUERY CACHE; + +-- Cache query result into query cache with a TTL of 3 sec +SELECT 1 SETTINGS use_query_cache = true, query_cache_ttl = 3; + +-- Expect one non-stale cache entry +SELECT COUNT(*) FROM system.query_cache; +SELECT stale FROM system.query_cache; + +-- Wait until entry is expired +SELECT sleep(3); +SELECT sleep(3); +SELECT stale FROM system.query_cache; + +SELECT '---'; + +-- Run same query as before +SELECT 1 SETTINGS use_query_cache = true, query_cache_ttl = 3; + +-- The entry should have been refreshed (non-stale) +SELECT COUNT(*) FROM system.query_cache; +SELECT stale FROM system.query_cache; + +SYSTEM DROP QUERY CACHE; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference new file mode 100644 index 00000000000..598ff1a490d --- /dev/null +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -0,0 +1,653 @@ +-- Disabled query_plan_remove_redundant_sorting +-- ORDER BY clauses in subqueries are untouched +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- Enabled query_plan_remove_redundant_sorting +-- ORDER BY removes ORDER BY clauses in subqueries +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- ORDER BY cannot remove ORDER BY in subquery WITH FILL +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC WITH FILL STEP 1 +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + Filling + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + LIMIT 1 BY number +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + LimitBy + Header: number UInt64 + Expression (Before LIMIT BY) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed +-- query +SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +-- explain +Expression ((Projection + Before ORDER BY)) +Header: number UInt64 + t2.number UInt64 + Join (JOIN FillRightFirst) + Header: number UInt64 + t2.number UInt64 + Expression ((Before JOIN + Projection)) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 + Expression ((Joined actions + (Rename joined columns + Projection))) + Header: t2.number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 2 +0 1 +0 0 +1 2 +1 1 +1 0 +2 2 +2 1 +2 0 +-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries +-- query +SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +ORDER BY t1.number, t2.number +-- explain +Expression (Projection) +Header: number UInt64 + t2.number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + t2.number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + t2.number UInt64 + Join (JOIN FillRightFirst) + Header: number UInt64 + t2.number UInt64 + Expression ((Before JOIN + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 + Expression ((Joined actions + (Rename joined columns + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) + Header: t2.number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 0 +0 1 +0 2 +1 0 +1 1 +1 2 +2 0 +2 1 +2 2 +-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries +-- query +SELECT sum(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number +-- explain +Expression ((Projection + Before ORDER BY)) +Header: sum(number) UInt64 + Aggregating + Header: number UInt64 + sum(number) UInt64 + Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +2 +1 +-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery +-- query +SELECT any(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number +-- explain +Expression ((Projection + Before ORDER BY)) +Header: number UInt64 + Aggregating + Header: number UInt64 + Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +2 +1 +-- query with aggregation function but w/o GROUP BY -> remove sorting +-- query +SELECT sum(number) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +-- explain +Expression ((Projection + Before ORDER BY)) +Header: sum(number) UInt64 + Aggregating + Header: sum(number) UInt64 + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +45 +-- check that optimization is applied recursively to subqueries as well +-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here +-- query +SELECT a +FROM +( + SELECT sum(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC +-- explain +Expression (Projection) +Header: a UInt64 + Sorting (Sorting for ORDER BY) + Header: a UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: a UInt64 + Aggregating + Header: number UInt64 + sum(number) UInt64 + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function +-- query +SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC +-- explain +Expression (Projection) +Header: a UInt64 + Sorting (Sorting for ORDER BY) + Header: a UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: a UInt64 + Aggregating + Header: number UInt64 + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY +-- query +SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + ) + GROUP BY number +) +WHERE a > 0 +-- explain +Expression ((Projection + (Before ORDER BY + ))) +Header: a UInt64 + Aggregating + Header: number UInt64 + Filter + Header: number UInt64 + Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +2 +1 +-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + GROUP BY number + ORDER BY number ASC + ) + ORDER BY number ASC +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + Aggregating + Header: number UInt64 + Expression (Before GROUP BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- sum() with Floats depends on order, -> sorting is not removed here +-- query +SELECT + toTypeName(sum(v)), + sum(v) +FROM +( + SELECT v + FROM + ( + SELECT CAST('9007199254740992', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + ) + ORDER BY v ASC +) +-- explain +Expression ((Projection + Before ORDER BY)) +Header: toTypeName(sum(v)) String + sum(v) Float64 + Aggregating + Header: sum(v) Float64 + Expression ((Before GROUP BY + Projection)) + Header: v Float64 + Sorting (Sorting for ORDER BY) + Header: v Float64 + Union + Header: v Float64 + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Float64 + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Float64 + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Float64 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +-- execute +Float64 9007199254740994 +-- sum() with Nullable(Floats) depends on order, -> sorting is not removed here +-- query +SELECT + toTypeName(sum(v)), + sum(v) +FROM +( + SELECT v + FROM + ( + SELECT '9007199254740992'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + ) + ORDER BY v ASC +) +-- explain +Expression ((Projection + Before ORDER BY)) +Header: toTypeName(sum(v)) String + sum(v) Nullable(Float64) + Aggregating + Header: sum(v) Nullable(Float64) + Expression ((Before GROUP BY + Projection)) + Header: v Nullable(Float64) + Sorting (Sorting for ORDER BY) + Header: v Nullable(Float64) + Union + Header: v Nullable(Float64) + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Nullable(Float64) + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Nullable(Float64) + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Nullable(Float64) + ReadFromStorage (SystemOne) + Header: dummy UInt8 +-- execute +Nullable(Float64) 9007199254740994 +-- sumIf() with Floats depends on order, -> sorting is not removed here +-- query +SELECT + toTypeName(sumIf(v, v > 0)), + sumIf(v, v > 0) +FROM +( + SELECT v + FROM + ( + SELECT CAST('9007199254740992', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + ) + ORDER BY v ASC +) +-- explain +Expression ((Projection + Before ORDER BY)) +Header: toTypeName(sumIf(v, greater(v, 0))) String + sumIf(v, greater(v, 0)) Float64 + Aggregating + Header: sumIf(v, greater(v, 0)) Float64 + Expression ((Before GROUP BY + Projection)) + Header: v Float64 + greater(v, 0) UInt8 + Sorting (Sorting for ORDER BY) + Header: v Float64 + Union + Header: v Float64 + Expression ((Before ORDER BY + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Float64 + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Float64 + ReadFromStorage (SystemOne) + Header: dummy UInt8 + Expression (( + (Conversion before UNION + (Projection + Before ORDER BY)))) + Header: v Float64 + ReadFromStorage (SystemOne) + Header: dummy UInt8 +-- execute +Float64 9007199254740994 +-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function +-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order +-- query +SELECT + number, + neighbor(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + neighbor(number, 2) UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + neighbor(number, 2) UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + neighbor(number, 2) UInt64 + Expression (Projection) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 0 +1 0 +2 0 +3 1 +4 2 +5 3 +6 4 +7 5 +8 6 +9 7 +-- non-stateful function does _not_ prevent removing inner ORDER BY +-- query +SELECT + number, + plus(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +-- explain +Expression (Projection) +Header: number UInt64 + plus(number, 2) UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + plus(number, 2) UInt64 + Expression (Projection) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +9 11 +8 10 +7 9 +6 8 +5 7 +4 6 +3 5 +2 4 +1 3 +0 2 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh new file mode 100755 index 00000000000..45d8e188824 --- /dev/null +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -0,0 +1,319 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DISABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=0;SET optimize_duplicate_order_by_and_distinct=0" +ENABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=1;SET optimize_duplicate_order_by_and_distinct=0" + +echo "-- Disabled query_plan_remove_redundant_sorting" +echo "-- ORDER BY clauses in subqueries are untouched" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +ORDER BY number ASC" +$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN header=1 $query" + +function run_query { + echo "-- query" + echo "$1" + echo "-- explain" + $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN header=1 $1" + echo "-- execute" + $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" +} + +echo "-- Enabled query_plan_remove_redundant_sorting" +echo "-- ORDER BY removes ORDER BY clauses in subqueries" +run_query "$query" + +echo "-- ORDER BY cannot remove ORDER BY in subquery WITH FILL" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC WITH FILL STEP 1 +) +ORDER BY number ASC" +run_query "$query" + +echo "-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + LIMIT 1 BY number +) +ORDER BY number ASC" +run_query "$query" + +echo "-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed" +query="SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2" +run_query "$query" + +echo "-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries" +query="SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +ORDER BY t1.number, t2.number" +run_query "$query" + +echo "-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries" +query="SELECT sum(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number" +run_query "$query" + +echo "-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery" +query="SELECT any(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number" +run_query "$query" + +echo "-- query with aggregation function but w/o GROUP BY -> remove sorting" +query="SELECT sum(number) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +)" +run_query "$query" + +echo "-- check that optimization is applied recursively to subqueries as well" +echo "-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here" +query="SELECT a +FROM +( + SELECT sum(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC" +run_query "$query" + +echo "-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function" +query="SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC" +run_query "$query" + +echo "-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY" +query="SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + ) + GROUP BY number +) +WHERE a > 0" +run_query "$query" + +echo "-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + GROUP BY number + ORDER BY number ASC + ) + ORDER BY number ASC +) +ORDER BY number ASC" +run_query "$query" + +echo "-- sum() with Floats depends on order, -> sorting is not removed here" +query="SELECT + toTypeName(sum(v)), + sum(v) +FROM +( + SELECT v + FROM + ( + SELECT CAST('9007199254740992', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + ) + ORDER BY v ASC +)" +run_query "$query" + +echo "-- sum() with Nullable(Floats) depends on order, -> sorting is not removed here" +query="SELECT + toTypeName(sum(v)), + sum(v) +FROM +( + SELECT v + FROM + ( + SELECT '9007199254740992'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + UNION ALL + SELECT '1'::Nullable(Float64) AS v + ) + ORDER BY v ASC +)" +run_query "$query" + +echo "-- sumIf() with Floats depends on order, -> sorting is not removed here" +query="SELECT + toTypeName(sumIf(v, v > 0)), + sumIf(v, v > 0) +FROM +( + SELECT v + FROM + ( + SELECT CAST('9007199254740992', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + UNION ALL + SELECT CAST('1', 'Float64') AS v + ) + ORDER BY v ASC +)" +run_query "$query" + +echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function" +ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION" +echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order" +query="SELECT + number, + neighbor(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +ORDER BY number ASC" +run_query "$query" + +echo "-- non-stateful function does _not_ prevent removing inner ORDER BY" +query="SELECT + number, + plus(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +)" +run_query "$query" diff --git a/tests/queries/0_stateless/02498_analyzer_settings_push_down.reference b/tests/queries/0_stateless/02498_analyzer_settings_push_down.reference new file mode 100644 index 00000000000..583da07380e --- /dev/null +++ b/tests/queries/0_stateless/02498_analyzer_settings_push_down.reference @@ -0,0 +1,120 @@ +-- { echoOn } + +SELECT value FROM (SELECT tupleElement(value, 'a') AS value FROM test_table); +0 +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table +); +QUERY id: 0 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3 + JOIN TREE + QUERY id: 3, is_subquery: 1 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: tupleElement, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: value, result_type: Tuple(a UInt64), source_id: 8 + CONSTANT id: 9, constant_value: \'a\', constant_value_type: String + JOIN TREE + TABLE id: 8, table_name: default.test_table +SELECT '--'; +-- +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table +) SETTINGS optimize_functions_to_subcolumns = 1; +QUERY id: 0 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3 + JOIN TREE + QUERY id: 3, is_subquery: 1 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: value.a, result_type: UInt64, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_table + SETTINGS optimize_functions_to_subcolumns=1 +SELECT '--'; +-- +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table SETTINGS optimize_functions_to_subcolumns = 0 +) SETTINGS optimize_functions_to_subcolumns = 1; +QUERY id: 0 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3 + JOIN TREE + QUERY id: 3, is_subquery: 1 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: tupleElement, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: value, result_type: Tuple(a UInt64), source_id: 8 + CONSTANT id: 9, constant_value: \'a\', constant_value_type: String + JOIN TREE + TABLE id: 8, table_name: default.test_table + SETTINGS optimize_functions_to_subcolumns=0 + SETTINGS optimize_functions_to_subcolumns=1 +SELECT '--'; +-- +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table +) SETTINGS optimize_functions_to_subcolumns = 0; +QUERY id: 0 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3 + JOIN TREE + QUERY id: 3, is_subquery: 1 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: tupleElement, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 6, nodes: 2 + COLUMN id: 7, column_name: value, result_type: Tuple(a UInt64), source_id: 8 + CONSTANT id: 9, constant_value: \'a\', constant_value_type: String + JOIN TREE + TABLE id: 8, table_name: default.test_table + SETTINGS optimize_functions_to_subcolumns=0 +SELECT '--'; +-- +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table SETTINGS optimize_functions_to_subcolumns = 1 +) SETTINGS optimize_functions_to_subcolumns = 0; +QUERY id: 0 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: UInt64, source_id: 3 + JOIN TREE + QUERY id: 3, is_subquery: 1 + PROJECTION COLUMNS + value UInt64 + PROJECTION + LIST id: 4, nodes: 1 + COLUMN id: 5, column_name: value.a, result_type: UInt64, source_id: 6 + JOIN TREE + TABLE id: 6, table_name: default.test_table + SETTINGS optimize_functions_to_subcolumns=1 + SETTINGS optimize_functions_to_subcolumns=0 diff --git a/tests/queries/0_stateless/02498_analyzer_settings_push_down.sql b/tests/queries/0_stateless/02498_analyzer_settings_push_down.sql new file mode 100644 index 00000000000..91bdce2cca9 --- /dev/null +++ b/tests/queries/0_stateless/02498_analyzer_settings_push_down.sql @@ -0,0 +1,42 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table (id UInt64, value Tuple(a UInt64)) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (0, tuple(0)); + +-- { echoOn } + +SELECT value FROM (SELECT tupleElement(value, 'a') AS value FROM test_table); + +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table +); + +SELECT '--'; + +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table +) SETTINGS optimize_functions_to_subcolumns = 1; + +SELECT '--'; + +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table SETTINGS optimize_functions_to_subcolumns = 0 +) SETTINGS optimize_functions_to_subcolumns = 1; + +SELECT '--'; + +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table +) SETTINGS optimize_functions_to_subcolumns = 0; + +SELECT '--'; + +EXPLAIN QUERY TREE SELECT value FROM ( + SELECT tupleElement(value, 'a') AS value FROM test_table SETTINGS optimize_functions_to_subcolumns = 1 +) SETTINGS optimize_functions_to_subcolumns = 0; + +-- { echoOff } + +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.sql b/tests/queries/0_stateless/02517_fuse_bug_44712.sql deleted file mode 100644 index 894bf9e06d5..00000000000 --- a/tests/queries/0_stateless/02517_fuse_bug_44712.sql +++ /dev/null @@ -1,10 +0,0 @@ -DROP TABLE IF EXISTS fuse_tbl__fuzz_35; - -CREATE TABLE fuse_tbl__fuzz_35 (`a` UInt8, `b` Nullable(Int16)) ENGINE = Log; -INSERT INTO fuse_tbl__fuzz_35 SELECT number, number + 1 FROM numbers(1000); - -set allow_experimental_analyzer = 0, optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; - -SELECT quantile(0.5)(b), quantile(0.9)(b) FROM (SELECT x + 2147483648 AS b FROM (SELECT quantile(0.5)(b) AS x FROM fuse_tbl__fuzz_35) GROUP BY x) FORMAT Null; - -DROP TABLE IF EXISTS fuse_tbl__fuzz_35; diff --git a/tests/queries/0_stateless/02521_merge_over_gap.sh b/tests/queries/0_stateless/02521_merge_over_gap.sh index 8d6ed777ba9..4f73b5bceee 100755 --- a/tests/queries/0_stateless/02521_merge_over_gap.sh +++ b/tests/queries/0_stateless/02521_merge_over_gap.sh @@ -45,4 +45,5 @@ $CLICKHOUSE_CLIENT --query "DETACH TABLE table_with_gap;" $CLICKHOUSE_CLIENT --query "ATTACH TABLE table_with_gap;" $CLICKHOUSE_CLIENT --query "SELECT 'parts after detach/attach';" +$CLICKHOUSE_CLIENT --query "SYSTEM WAIT LOADING PARTS table_with_gap;" $CLICKHOUSE_CLIENT --query "SELECT name, rows, active FROM system.parts WHERE table = 'table_with_gap' AND database = currentDatabase();" diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.reference b/tests/queries/0_stateless/02521_to_custom_day_of_week.reference new file mode 100644 index 00000000000..660dff37b72 --- /dev/null +++ b/tests/queries/0_stateless/02521_to_custom_day_of_week.reference @@ -0,0 +1,7 @@ +1 7 +1 7 +0 6 +1 0 +2 1 +1 7 +0 6 diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.sql b/tests/queries/0_stateless/02521_to_custom_day_of_week.sql new file mode 100644 index 00000000000..5475e15a984 --- /dev/null +++ b/tests/queries/0_stateless/02521_to_custom_day_of_week.sql @@ -0,0 +1,10 @@ + +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon), toDayOfWeek(date_sun); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 0), toDayOfWeek(date_sun, 0); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 1), toDayOfWeek(date_sun, 1); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 2), toDayOfWeek(date_sun, 2); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 3), toDayOfWeek(date_sun, 3); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 4), toDayOfWeek(date_sun, 4); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 5), toDayOfWeek(date_sun, 5); + +select toDayOfWeek(today(), -1); -- { serverError 43 } diff --git a/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.reference b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.reference new file mode 100644 index 00000000000..4279025be39 --- /dev/null +++ b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.reference @@ -0,0 +1,173 @@ +CSV +1 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +456 World [4,5,6] +2 +x UInt32 +y String +z Array(UInt32) +123 Hello [1,2,3] +456 World [4,5,6] +3 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +123 Hello World +456 World Hello +4 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +123 Hello World +456 World Hello +5 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +6 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +7 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array +8 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array +123 Hello [1,2,3] +9 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +10 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 "Hello" [1,2,3] +11 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Hello \N World +12 +x Nullable(String) +y Nullable(String) +z Array(Nullable(Int64)) +Hello \N [1,2,3] +13 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Hello \N World +\N Hello \N +14 +x Nullable(String) +y Nullable(String) +z Array(Nullable(Int64)) +Hello \N [] +\N \N [1,2,3] +15 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Hello \N \N +\N World \N +16 +a"b Nullable(Int64) +c Nullable(Int64) +1 2 +17 +1 2 +18 +1 0 +TSV +1 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +456 World [4,5,6] +2 +x UInt32 +y String +z Array(UInt32) +123 Hello [1,2,3] +456 World [4,5,6] +3 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Foo Hello World +Bar World Hello +4 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +Foo Hello World +Bar World Hello +CustomSeparated +1 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +456 World [4,5,6] +2 +x UInt32 +y String +z Array(UInt32) +123 Hello [1,2,3] +456 World [4,5,6] +3 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Foo Hello World +Bar World Hello +4 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +Foo Hello World +Bar World Hello +5 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +6 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +7 +x UInt32 +y String +z Array(UInt32) +42 Hello [1,2,3] diff --git a/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.sh b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.sh new file mode 100755 index 00000000000..a20afef875e --- /dev/null +++ b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.sh @@ -0,0 +1,313 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "CSV" +echo 1 +echo '"x","y","z" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 2 +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 3 +echo '"x","y","z" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 4 +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 5 +echo '"x","y","z"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; +echo '"x","y","z"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 6 +echo '"x","y","z" +"UInt32","String","Array(UInt32)"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array(UInt32)"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 7 +echo '"x","y","z" +"UInt32","String","Array"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 8 +echo '"x","y","z" +"UInt32","String","Array" +"123","Hello","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array" +"123","Hello","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 9 +echo '"x","y","z" +123,"Hello","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +123,"Hello","[1,2,3]"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 10 +echo '"x","y","z" +123,"""Hello""","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +123,"""Hello""","[1,2,3]"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 11 +echo '"x","y","z" +"Hello",\N,"World"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,"World"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 12 +echo '"x","y","z" +"Hello",\N,"[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,"[1,2,3]"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 13 +echo '"x","y","z" +"Hello",\N,"World" +\N,"Hello",\N' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,"World" +\N,"Hello",\N'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 14 +echo '"x","y","z" +"Hello",\N,\N +\N,\N,"[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,\N +\N,\N,"[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 15 +echo '"x","y","z" +"Hello",\N,\N +\N,"World",\N' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,\N +\N,"World",\N' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 16 +echo '"a""b","c" +1,2' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"a""b","c" +1,2' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 17 +echo '"a","b","c" +1,2,3' | $CLICKHOUSE_LOCAL --input-format='CSV' --structure='a UInt32, b UInt32' --table='test' -q "select * from test"; + +echo 18 +echo '"a" +1' | $CLICKHOUSE_LOCAL --input-format='CSV' --structure='a UInt32, b UInt32' --table='test' -q "select * from test"; + +echo "TSV" +echo 1 +echo -e 'x\ty\tz +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo 2 +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo 3 +echo -e 'x\ty\tz +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo 4 +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo "CustomSeparated" + +echo 1 +echo ' +"x""y""z" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + +echo 2 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 3 +echo ' +"x""y""z" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 4 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 5 +echo ' +"x""y""z" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 6 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + +echo 7 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +42"Hello"[1,2,3] +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='JSON' + + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +42"Hello"[1,2,3] +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='JSON' + + diff --git a/tests/queries/0_stateless/02523_array_shuffle.reference b/tests/queries/0_stateless/02523_array_shuffle.reference new file mode 100644 index 00000000000..0504da61f9d --- /dev/null +++ b/tests/queries/0_stateless/02523_array_shuffle.reference @@ -0,0 +1,85 @@ +[] +[] +[9223372036854775808] +[9223372036854775808] +[10,9,4,2,5,6,7,1,8,3] +[10,9,4,2,5,6,7,1,8,3] +[10.1,9,4,2,5,6,7,1,8,3] +[9223372036854775808,9,4,2,5,6,7,1,8,3] +[NULL,9,4,2,5,6,7,1,8,3] +['789','123','ABC','000','456'] +['789','123','ABC',NULL,'456'] +['imposter','storage','sensation','uniform','tiger','terminal'] +[NULL,'storage','sensation','uniform','tiger','terminal'] +[NULL] +[NULL,NULL] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[-1,-2,-3,-4]] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[-1,-2,-3,-4]] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[NULL,-2,-3,-4]] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[(3,-3),(1,-1),(99999999,-99999999)] +[(3,'A'),(1,NULL),(2,'a')] +[] +[] +[] +[9223372036854775808] +[9223372036854775808] +[9223372036854775808] +[10,9,4,2,5,6,7,1,8,3] +[10.1,9,4,2,5,6,7,1,8,3] +[9223372036854775808,9,4,2,5,6,7,1,8,3] +[NULL,9,4,2,5,6,7,1,8,3] +['789','123','ABC','000','456'] +['789','123','ABC',NULL,'456'] +['imposter','storage','sensation','uniform','tiger','terminal'] +[NULL,'storage','sensation','uniform','tiger','terminal'] +[NULL] +[NULL,NULL] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[-1,-2,-3,-4]] +[[10,20,30,40],[1,2,3,4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64],[NULL,-2,-3,-4]] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,5,48,67,90,20,27,38,19,54,21,83,84,1,22,56,81,91,77,36,63,33,39,24,40,4,99,14,23,94,29,26,96,2,28,31,57,42,88,12,47,58,8,37,82,92,34,6,60,25,43,50,74,70,52,55,62,17,79,65,93,86,7,16,41,59,75,80,45,69,89,85,87,95,64,61,98,49,78,66,15] +[(3,-3),(1,-1),(99999999,-99999999)] +[(3,'A'),(1,NULL),(2,'a')] +[NULL,NULL,NULL] +[10,2,3,4,5,6,7,8,9,1] +[10,9,3,4,5,6,7,8,2,1] +[10,9,4,2,5,6,7,8,3,1] +[10,9,4,2,5,6,7,1,3,8] +[10,9,4,2,5,6,7,1,8,3] +[10,9,4,2,5,6,7,1,8,3] +[10,9,4,2,5,6,7,1,8,3] +[10.1,9,4,2,5,6,7,8,3,1] +[9223372036854775808,9,4,2,5,6,7,8,3,1] +[NULL,9,4,2,5,6,7,8,3,1] +['789','123','ABC','456','000'] +['789','123','ABC','456',NULL] +['imposter','storage','sensation','terminal','uniform','tiger'] +[NULL,'storage','sensation','terminal','uniform','tiger'] +[[10,20,30,40],[1,2,3,4],[-1,-2,-3,-4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]] +[[10,20,30,40],[1,2,3,4],[NULL,-2,-3,-4],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,20,21,22,23,24,25,26,27,28,29,17,31,15,33,34,2,36,37,38,39,40,41,42,43,8,45,6,47,48,49,50,16,52,14,54,55,56,57,58,59,60,61,62,63,64,65,66,67,19,69,70,7,1,4,74,75,5,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,12,98,99] +[10,72,11,18,73,76,46,71,44,35,9,0,97,53,13,32,51,30,3,68,20,21,22,23,24,25,26,27,28,29,17,31,15,33,34,2,36,37,38,39,40,41,42,43,8,45,6,47,48,49,50,16,52,14,54,55,56,57,58,59,60,61,62,63,64,65,66,67,19,69,70,7,1,4,74,75,5,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,12,98,99] +[(3,-3),(1,-1),(99999999,-99999999)] +[(3,'A'),(1,NULL),(2,'a')] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,2,1] +[3,1,2] +[1,3,2] +[2,1,3] +[3,2,1] +[3,2,1] +[1,2,3] +[3,2,1] +[3,2,1] +[2,1,3] diff --git a/tests/queries/0_stateless/02523_array_shuffle.sql b/tests/queries/0_stateless/02523_array_shuffle.sql new file mode 100644 index 00000000000..9138657c842 --- /dev/null +++ b/tests/queries/0_stateless/02523_array_shuffle.sql @@ -0,0 +1,71 @@ +SELECT arrayShuffle([]); +SELECT arrayShuffle([], 0xbad_cafe); +SELECT arrayShuffle([9223372036854775808]); +SELECT arrayShuffle([9223372036854775808], 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,10], 0xbad_cafe); +SELECT arrayShuffle(materialize([1,2,3,4,5,6,7,8,9,10]), 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,10.1], 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 0xbad_cafe); +SELECT arrayShuffle([1,2,3,4,5,6,7,8,9,NULL], 0xbad_cafe); +SELECT arrayShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), toFixedString('000', 3)], 0xbad_cafe); +SELECT arrayShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), NULL], 0xbad_cafe); +SELECT arrayShuffle(['storage','tiger','imposter','terminal','uniform','sensation'], 0xbad_cafe); +SELECT arrayShuffle(['storage','tiger',NULL,'terminal','uniform','sensation'], 0xbad_cafe); +SELECT arrayShuffle([NULL]); +SELECT arrayShuffle([NULL,NULL]); +SELECT arrayShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0xbad_cafe); +SELECT arrayShuffle(materialize([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]]), 0xbad_cafe); +SELECT arrayShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0xbad_cafe); +SELECT arrayShuffle(groupArray(x),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayShuffle(groupArray(toUInt64(x)),0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 0xbad_cafe); +SELECT arrayShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 0xbad_cafe); +SELECT arrayPartialShuffle([]); -- trivial cases (equivalent to arrayShuffle) +SELECT arrayPartialShuffle([], 0); +SELECT arrayPartialShuffle([], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([9223372036854775808]); +SELECT arrayPartialShuffle([9223372036854775808], 0); +SELECT arrayPartialShuffle([9223372036854775808], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10.1], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,NULL], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), toFixedString('000', 3)], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), NULL], 0, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger','imposter','terminal','uniform','sensation'], 0, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger',NULL,'terminal','uniform','sensation'], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([NULL]); +SELECT arrayPartialShuffle([NULL,NULL]); +SELECT arrayPartialShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 0, 0xbad_cafe); +SELECT arrayPartialShuffle(groupArray(x),0,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle(groupArray(toUInt64(x)),0,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 0, 0xbad_cafe); +SELECT arrayPartialShuffle([NULL,NULL,NULL], 2); -- other, mostly non-trivial cases +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 1, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 2, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 8, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 9, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 10, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10], 100, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,10.1], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,9223372036854775808], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([1,2,3,4,5,6,7,8,9,NULL], 4, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), toFixedString('000', 3)], 3, 0xbad_cafe); +SELECT arrayPartialShuffle([toFixedString('123', 3), toFixedString('456', 3), toFixedString('789', 3), toFixedString('ABC', 3), NULL], 3, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger','imposter','terminal','uniform','sensation'], 3, 0xbad_cafe); +SELECT arrayPartialShuffle(['storage','tiger',NULL,'terminal','uniform','sensation'], 3, 0xbad_cafe); +SELECT arrayPartialShuffle([[1,2,3,4],[-1,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 2, 0xbad_cafe); +SELECT arrayPartialShuffle([[1,2,3,4],[NULL,-2,-3,-4],[10,20,30,40],[100,200,300,400,500,600,700,800,900],[2,4,8,16,32,64]], 2, 0xbad_cafe); +SELECT arrayPartialShuffle(groupArray(x),20,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle(groupArray(toUInt64(x)),20,0xbad_cafe) FROM (SELECT number as x from system.numbers LIMIT 100); +SELECT arrayPartialShuffle([tuple(1, -1), tuple(99999999, -99999999), tuple(3, -3)], 2, 0xbad_cafe); +SELECT arrayPartialShuffle([tuple(1, NULL), tuple(2, 'a'), tuple(3, 'A')], 2, 0xbad_cafe); +SELECT arrayShuffle([1, 2, 3], 42) FROM numbers(10); -- for constant array we don not materialize it and each row gets the same permutation +SELECT arrayShuffle(materialize([1, 2, 3]), 42) FROM numbers(10); +SELECT arrayShuffle(1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShuffle([1], 'a'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShuffle([1], 1.1); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT arrayShuffle([1], 0xcafe, 1); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH } \ No newline at end of file diff --git a/tests/queries/0_stateless/02531_ipv4_arithmetic.reference b/tests/queries/0_stateless/02531_ipv4_arithmetic.reference new file mode 100644 index 00000000000..6f03e4e6903 --- /dev/null +++ b/tests/queries/0_stateless/02531_ipv4_arithmetic.reference @@ -0,0 +1,20 @@ +10 1.2.3.4 0 +11 1.2.3.4 3 +12 1.2.3.4 4 +13 1.2.3.4 12 +14 1.2.3.4 0 +15 1.2.3.4 10 +16 1.2.3.4 4 +17 1.2.3.4 10 +18 1.2.3.4 4 +19 1.2.3.4 10 +20 1.2.3.4 0 +21 1.2.3.4 7 +22 1.2.3.4 14 +23 1.2.3.4 12 +24 1.2.3.4 4 +25 1.2.3.4 10 +26 1.2.3.4 12 +27 1.2.3.4 13 +28 1.2.3.4 0 +29 1.2.3.4 1 diff --git a/tests/queries/0_stateless/02531_ipv4_arithmetic.sql b/tests/queries/0_stateless/02531_ipv4_arithmetic.sql new file mode 100644 index 00000000000..59a99842d61 --- /dev/null +++ b/tests/queries/0_stateless/02531_ipv4_arithmetic.sql @@ -0,0 +1 @@ +SELECT number, ip, ip % number FROM (SELECT number, toIPv4('1.2.3.4') as ip FROM numbers(10, 20)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh b/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh index 8c47471ad3c..d93fe59134f 100755 --- a/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh +++ b/tests/queries/0_stateless/02531_two_level_aggregation_bug.sh @@ -4,7 +4,13 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -query_id=$(echo "select queryID() from (select sum(s), k from remote('127.0.0.{1,2}', view(select sum(number) s, bitAnd(number, 3) k from numbers_mt(1000000) group by k)) group by k) limit 1 settings group_by_two_level_threshold=1, max_threads=3, prefer_localhost_replica=1" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- 2>&1) +# It is totally ok if sometimes some of the query processing threads did not process any data, as all the data processed by the other threads. +# Check that at least once all 6 threads converted their aggregation data into two-level hash table. -${CLICKHOUSE_CLIENT} --query="system flush logs" -${CLICKHOUSE_CLIENT} --query="select count() from system.text_log where event_date >= today() - 1 and query_id = '${query_id}' and message like '%Converting aggregation data to two-level%'" +while true +do + query_id=$(echo "select queryID() from (select sum(s), k from remote('127.0.0.{1,2}', view(select sum(number) s, bitAnd(number, 3) k from numbers_mt(1000000) group by k)) group by k) limit 1 settings group_by_two_level_threshold=1, max_threads=3, prefer_localhost_replica=1" | ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}" --data-binary @- 2>&1) + + ${CLICKHOUSE_CLIENT} --query="system flush logs" + ${CLICKHOUSE_CLIENT} --query="select count() from system.text_log where event_date >= yesterday() and query_id = '${query_id}' and message like '%Converting aggregation data to two-level%'" | grep -P '^6$' && break; +done diff --git a/tests/queries/0_stateless/025335_analyzer_limit.reference b/tests/queries/0_stateless/025335_analyzer_limit.reference new file mode 100644 index 00000000000..ea7e98aa8ab --- /dev/null +++ b/tests/queries/0_stateless/025335_analyzer_limit.reference @@ -0,0 +1,10 @@ +10 +11 +12 +13 +14 +15 +16 +17 +18 +19 diff --git a/tests/queries/0_stateless/025335_analyzer_limit.sql b/tests/queries/0_stateless/025335_analyzer_limit.sql new file mode 100644 index 00000000000..8f98d823e5c --- /dev/null +++ b/tests/queries/0_stateless/025335_analyzer_limit.sql @@ -0,0 +1,3 @@ +SET allow_experimental_analyzer = 1; + +SELECT number FROM numbers(100) LIMIT 10 OFFSET 10; diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference new file mode 100644 index 00000000000..aaef17371d8 --- /dev/null +++ b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference @@ -0,0 +1,38 @@ +-- { echoOn } + +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; +12321 -30 \N \N +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; +\N \N 12321 -30 +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +123 123 123 5600 +321 -32 321 5601 +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +123 5600 123 123 +321 5601 321 -32 +SELECT test2.col1, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +123 123 123 +321 321 -32 +SELECT test2.col3, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +5600 123 123 +5601 321 -32 +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql new file mode 100644 index 00000000000..073f81e4ff3 --- /dev/null +++ b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql @@ -0,0 +1,46 @@ + +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test1 ( `col1` UInt64, `col2` Int8 ) ENGINE = MergeTree ORDER BY col1; +CREATE TABLE test2 ( `col1` UInt64, `col3` Int16 ) ENGINE = MergeTree ORDER BY col1; + +INSERT INTO test1 VALUES (123, 123), (12321, -30), (321, -32); +INSERT INTO test2 VALUES (123, 5600), (321, 5601); + +SET join_use_nulls = 1; + +-- { echoOn } + +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; + +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; + +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +SELECT test2.col1, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +SELECT test2.col3, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02534_keyed_siphash.reference b/tests/queries/0_stateless/02534_keyed_siphash.reference new file mode 100644 index 00000000000..52e92f37720 --- /dev/null +++ b/tests/queries/0_stateless/02534_keyed_siphash.reference @@ -0,0 +1,196 @@ +726FDB47DD0E0E31 +74F839C593DC67FD +0D6C8009D9A94F5A +85676696D7FB7E2D +CF2794E0277187B7 +18765564CD99A68D +CBC9466E58FEE3CE +AB0200F58B01D137 +93F5F5799A932462 +9E0082DF0BA9E4B0 +7A5DBBC594DDB9F3 +F4B32F46226BADA7 +751E8FBC860EE5FB +14EA5627C0843D90 +F723CA908E7AF2EE +A129CA6149BE45E5 +3F2ACC7F57C29BDB +699AE9F52CBE4794 +4BC1B3F0968DD39C +BB6DC91DA77961BD +BED65CF21AA2EE98 +D0F2CBB02E3B67C7 +93536795E3A33E88 +A80C038CCD5CCEC8 +B8AD50C6F649AF94 +BCE192DE8A85B8EA +17D835B85BBB15F3 +2F2E6163076BCFAD +DE4DAAACA71DC9A5 +A6A2506687956571 +AD87A3535C49EF28 +32D892FAD841C342 +7127512F72F27CCE +A7F32346F95978E3 +12E0B01ABB051238 +15E034D40FA197AE +314DFFBE0815A3B4 +027990F029623981 +CADCD4E59EF40C4D +9ABFD8766A33735C +0E3EA96B5304A7D0 +AD0C42D6FC585992 +187306C89BC215A9 +D4A60ABCF3792B95 +F935451DE4F21DF2 +A9538F0419755787 +DB9ACDDFF56CA510 +D06C98CD5C0975EB +E612A3CB9ECBA951 +C766E62CFCADAF96 +EE64435A9752FE72 +A192D576B245165A +0A8787BF8ECB74B2 +81B3E73D20B49B6F +7FA8220BA3B2ECEA +245731C13CA42499 +B78DBFAF3A8D83BD +EA1AD565322A1A0B +60E61C23A3795013 +6606D7E446282B93 +6CA4ECB15C5F91E1 +9F626DA15C9625F3 +E51B38608EF25F57 +958A324CEB064572 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +E28DBDE7FE22E41C +1CE422FEE7BD8DE20000000000000000 +E28DBDE7FE22E41C +1CE422FEE7BD8DE20000000000000000 diff --git a/tests/queries/0_stateless/02534_keyed_siphash.sql b/tests/queries/0_stateless/02534_keyed_siphash.sql new file mode 100644 index 00000000000..3c41efd7d58 --- /dev/null +++ b/tests/queries/0_stateless/02534_keyed_siphash.sql @@ -0,0 +1,274 @@ +-- Test Vectors from the SipHash reference C implementation: +-- Written in 2012 by +-- Jean-Philippe Aumasson +-- Daniel J. Bernstein +-- Released under CC0 + +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + '')); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61))); +select hex(sipHash64Keyed((toUInt64(506097522914230528), toUInt64(1084818905618843912)), + char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62))); + +-- CH tests +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash64(char(0)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash64(char(0, 1)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash64(char(0, 1, 2)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash64(char(0, 1, 2, 3)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash64(char(0, 1, 2, 3, 4)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash64(char(0, 1, 2, 3, 4, 5)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); +select sipHash64Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash64(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0)) == sipHash128(char(0)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1)) == sipHash128(char(0, 1)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2)) == sipHash128(char(0, 1, 2)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3)) == sipHash128(char(0, 1, 2, 3)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4)) == sipHash128(char(0, 1, 2, 3, 4)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5)) == sipHash128(char(0, 1, 2, 3, 4, 5)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62)); +select sipHash128Keyed((toUInt64(0),toUInt64(0)),char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)) == sipHash128(char(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63)); + +select sipHash64Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash128Keyed((0, 0), '1'); -- { serverError 48 } +select sipHash64Keyed(toUInt64(0), '1'); -- { serverError 48 } +select sipHash128Keyed(toUInt64(0), '1'); -- { serverError 48 } + +select hex(sipHash64()); +select hex(sipHash128()); +select hex(sipHash64Keyed()); +select hex(sipHash128Keyed()); diff --git a/tests/queries/0_stateless/02535_ip_parser_not_whole.reference b/tests/queries/0_stateless/02535_ip_parser_not_whole.reference new file mode 100644 index 00000000000..31502960af3 --- /dev/null +++ b/tests/queries/0_stateless/02535_ip_parser_not_whole.reference @@ -0,0 +1,3 @@ +::1 42 +::1 42 +::1 42 diff --git a/tests/queries/0_stateless/02535_ip_parser_not_whole.sql b/tests/queries/0_stateless/02535_ip_parser_not_whole.sql new file mode 100644 index 00000000000..675707d197b --- /dev/null +++ b/tests/queries/0_stateless/02535_ip_parser_not_whole.sql @@ -0,0 +1,3 @@ +SELECT * FROM format(CSVWithNamesAndTypes, 'ip,port\nIPv6,UInt16\n::1,42\n'); +SELECT * FROM format(TSVWithNamesAndTypes, 'ip\tport\nIPv6\tUInt16\n::1\t42\n'); +SELECT * FROM format(JSONCompactEachRowWithNamesAndTypes, '["ip","port"]\n["IPv6","UInt16"]\n["::1",42]\n'); diff --git a/tests/queries/0_stateless/02536_date_from_number_inference_fix.reference b/tests/queries/0_stateless/02536_date_from_number_inference_fix.reference new file mode 100644 index 00000000000..3fb7eba4357 --- /dev/null +++ b/tests/queries/0_stateless/02536_date_from_number_inference_fix.reference @@ -0,0 +1,3 @@ +x Nullable(Int64) +20000101 +19000101 diff --git a/tests/queries/0_stateless/02536_date_from_number_inference_fix.sql b/tests/queries/0_stateless/02536_date_from_number_inference_fix.sql new file mode 100644 index 00000000000..912057265e7 --- /dev/null +++ b/tests/queries/0_stateless/02536_date_from_number_inference_fix.sql @@ -0,0 +1,4 @@ +desc format(JSONEachRow, '{"x" : "20000101"}'); +select * from format(JSONEachRow, '{"x" : "20000101"}'); +select * from format(JSONEachRow, '{"x" : "19000101"}'); + diff --git a/tests/queries/0_stateless/02536_delta_gorilla_corruption.reference b/tests/queries/0_stateless/02536_delta_gorilla_corruption.reference new file mode 100644 index 00000000000..82220c1598a --- /dev/null +++ b/tests/queries/0_stateless/02536_delta_gorilla_corruption.reference @@ -0,0 +1,9 @@ +Original bug: the same query executed multiple times yielded different results. +For unclear reasons this happened only in Release builds, not in Debug builds. +0 +0 +0 +The same issue in a much smaller repro happens also in Debug builds +0 +1 +3 diff --git a/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql new file mode 100644 index 00000000000..197a8ad7221 --- /dev/null +++ b/tests/queries/0_stateless/02536_delta_gorilla_corruption.sql @@ -0,0 +1,39 @@ +-- Tags: no-asan +-- no-asan: the flaky check complains that the test sometimes runs > 60 sec on asan builds + +set allow_suspicious_codecs=1; + +select 'Original bug: the same query executed multiple times yielded different results.'; +select 'For unclear reasons this happened only in Release builds, not in Debug builds.'; + +drop table if exists bug_delta_gorilla; + +create table bug_delta_gorilla +(value_bug UInt64 codec (Delta, Gorilla)) +engine = MergeTree +order by tuple() +as (select 0 from numbers(30000000)); + +select count(*) +from bug_delta_gorilla +where 0 <> value_bug; + +select count(*) +from bug_delta_gorilla +where 0 <> value_bug; + +select count(*) +from bug_delta_gorilla +where 0 <> value_bug; + +drop table if exists bug_delta_gorilla; + +select 'The same issue in a much smaller repro happens also in Debug builds'; + +create table bug_delta_gorilla (val UInt64 codec (Delta, Gorilla)) +engine = MergeTree +order by val; +insert into bug_delta_gorilla values (0)(1)(3); +select * from bug_delta_gorilla; + +drop table if exists bug_delta_gorilla; diff --git a/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.reference b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.reference new file mode 100644 index 00000000000..7a930fd0fb3 --- /dev/null +++ b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.reference @@ -0,0 +1 @@ +(1,2) diff --git a/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.sql b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.sql new file mode 100644 index 00000000000..6ca79ba30ba --- /dev/null +++ b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.sql @@ -0,0 +1,11 @@ +-- Tags: no-fasttest, no-parallel, no-cpu-aarch64 +-- Tag no-fasttest: Depends on Java + +insert into table function hdfs('hdfs://localhost:12222/test_02536.jsonl', 'TSV') select '{"x" : {"a" : 1, "b" : 2}}' settings hdfs_truncate_on_insert=1; +drop table if exists test; +create table test (x Tuple(a UInt32, b UInt32)) engine=Memory(); +insert into test select * from hdfsCluster('test_cluster_two_shards_localhost', 'hdfs://localhost:12222/test_02536.jsonl') settings use_structure_from_insertion_table_in_table_functions=0; -- {serverError TYPE_MISMATCH} +insert into test select * from hdfsCluster('test_cluster_two_shards_localhost', 'hdfs://localhost:12222/test_02536.jsonl') settings use_structure_from_insertion_table_in_table_functions=1; +select * from test; +drop table test; + diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.reference b/tests/queries/0_stateless/02536_system_sync_file_cache.reference similarity index 100% rename from tests/queries/0_stateless/02517_fuse_bug_44712.reference rename to tests/queries/0_stateless/02536_system_sync_file_cache.reference diff --git a/tests/queries/0_stateless/02536_system_sync_file_cache.sql b/tests/queries/0_stateless/02536_system_sync_file_cache.sql new file mode 100644 index 00000000000..50aebea59cb --- /dev/null +++ b/tests/queries/0_stateless/02536_system_sync_file_cache.sql @@ -0,0 +1 @@ +system sync file cache; diff --git a/tests/queries/0_stateless/02537_system_formats.reference b/tests/queries/0_stateless/02537_system_formats.reference new file mode 100644 index 00000000000..5987834d9b9 --- /dev/null +++ b/tests/queries/0_stateless/02537_system_formats.reference @@ -0,0 +1,2 @@ +CSV 1 1 1 1 +Native 1 1 0 0 diff --git a/tests/queries/0_stateless/02537_system_formats.sql b/tests/queries/0_stateless/02537_system_formats.sql new file mode 100644 index 00000000000..7a09daf325c --- /dev/null +++ b/tests/queries/0_stateless/02537_system_formats.sql @@ -0,0 +1 @@ +SELECT * FROM system.formats WHERE name IN ('CSV', 'Native') ORDER BY name; diff --git a/tests/queries/0_stateless/02538_analyzer_create_table_as_select.reference b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.reference new file mode 100644 index 00000000000..f6ac79e2047 --- /dev/null +++ b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.reference @@ -0,0 +1 @@ +0 Value diff --git a/tests/queries/0_stateless/02538_analyzer_create_table_as_select.sql b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.sql new file mode 100644 index 00000000000..168066ce2f9 --- /dev/null +++ b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.sql @@ -0,0 +1,18 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_data; +CREATE TABLE test_table_data +( + id UInt64, + value String +) ENGINE=MergeTree() ORDER BY id; + +INSERT INTO test_table_data VALUES (0, 'Value'); + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table ENGINE=MergeTree() ORDER BY tuple() AS SELECT * FROM test_table_data; + +SELECT * FROM test_table; + +DROP TABLE test_table_data; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02538_ngram_bf_index_with_null.reference b/tests/queries/0_stateless/02538_ngram_bf_index_with_null.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02538_ngram_bf_index_with_null.sql b/tests/queries/0_stateless/02538_ngram_bf_index_with_null.sql new file mode 100644 index 00000000000..b53c219ff03 --- /dev/null +++ b/tests/queries/0_stateless/02538_ngram_bf_index_with_null.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS 02538_bf_ngrambf_map_values_test; + +CREATE TABLE 02538_bf_ngrambf_map_values_test (`row_id` Int128, `map` Map(String, String), `map_fixed` Map(FixedString(2), String), +INDEX map_values_ngrambf mapKeys(map) TYPE ngrambf_v1(4, 256, 2, 0) GRANULARITY 1, +INDEX map_fixed_values_ngrambf mapKeys(map_fixed) TYPE ngrambf_v1(4, 256, 2, 0) GRANULARITY 1) +ENGINE = MergeTree +ORDER BY row_id +SETTINGS index_granularity = 1; + +INSERT INTO 02538_bf_ngrambf_map_values_test VALUES (1, {'a': 'a'}, {'b': 'b'}); + +SELECT * FROM 02538_bf_ngrambf_map_values_test PREWHERE (map['']) = 'V2V\0V2V2V2V2V2V2' WHERE (map[NULL]) = 'V2V\0V2V2V2V2V2V2V2V\0V2V2V2V2V2V2V2V\0V2V2V2V2V2V2V2V\0V2V2V2V2V2V2' SETTINGS force_data_skipping_indices = 'map_values_ngrambf'; + +DROP TABLE 02538_bf_ngrambf_map_values_test; diff --git a/tests/queries/0_stateless/02538_nullable_array_tuple_timeseries.reference b/tests/queries/0_stateless/02538_nullable_array_tuple_timeseries.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02538_nullable_array_tuple_timeseries.sql b/tests/queries/0_stateless/02538_nullable_array_tuple_timeseries.sql new file mode 100644 index 00000000000..26451c93ed9 --- /dev/null +++ b/tests/queries/0_stateless/02538_nullable_array_tuple_timeseries.sql @@ -0,0 +1,31 @@ +DROP TABLE IF EXISTS tbl; + +-- Checks that (floating-point) time series codecs can be combined +-- with Nullable and +-- with composite types Array and Tuple + +CREATE TABLE tbl ( + -- Nullable + v1_gor Nullable(Float64) CODEC(Gorilla), + v1_fpc Nullable(Float64) CODEC(FPC), + -- Array + v2_gor Array(Float64) CODEC(Gorilla), + v2_fpc Array(Float64) CODEC(FPC), + v3_gor Array(Array(Float64)) CODEC(Gorilla), + v3_fpc Array(Array(Float64)) CODEC(FPC), + v4_gor Array(Nullable(Float64)) CODEC(Gorilla), + v4_fpc Array(Nullable(Float64)) CODEC(FPC), + v5_gor Array(Tuple(Float64)) CODEC(Gorilla), + v5_fpc Array(Tuple(Float64)) CODEC(FPC), + -- Tuple + v6_gor Tuple(Float64) CODEC(Gorilla), + v6_fpc Tuple(Float64) CODEC(FPC), + v7_gor Tuple(Tuple(Float64)) CODEC(Gorilla), + v7_fpc Tuple(Tuple(Float64)) CODEC(FPC), + v8_gor Tuple(Nullable(Float64)) CODEC(Gorilla), + v8_fpc Tuple(Nullable(Float64)) CODEC(FPC), + v9_gor Tuple(Array(Float64)) CODEC(Gorilla), + v9_fpc Tuple(Array(Float64)) CODEC(FPC), +) Engine = MergeTree ORDER BY tuple(); + +DROP TABLE IF EXISTS tbl; diff --git a/tests/queries/0_stateless/02539_generate_random_ip.reference b/tests/queries/0_stateless/02539_generate_random_ip.reference new file mode 100644 index 00000000000..9972842f982 --- /dev/null +++ b/tests/queries/0_stateless/02539_generate_random_ip.reference @@ -0,0 +1 @@ +1 1 diff --git a/tests/queries/0_stateless/02539_generate_random_ip.sql b/tests/queries/0_stateless/02539_generate_random_ip.sql new file mode 100644 index 00000000000..597b3a5be82 --- /dev/null +++ b/tests/queries/0_stateless/02539_generate_random_ip.sql @@ -0,0 +1,2 @@ +-- Check that the function works for Ipv4 and Ipv6 and gives at least something plausible: +SELECT uniq(v4) > 1000, uniq(v6) > 1000 FROM (SELECT * FROM generateRandom('v4 IPv4, v6 IPv6') LIMIT 100000); diff --git a/tests/queries/0_stateless/02539_generate_random_low_cardinality.reference b/tests/queries/0_stateless/02539_generate_random_low_cardinality.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02539_generate_random_low_cardinality.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02539_generate_random_low_cardinality.sql b/tests/queries/0_stateless/02539_generate_random_low_cardinality.sql new file mode 100644 index 00000000000..c524d2ea5bb --- /dev/null +++ b/tests/queries/0_stateless/02539_generate_random_low_cardinality.sql @@ -0,0 +1,2 @@ +-- Check that the function works for LowCardinality and gives at least something plausible: +SELECT uniq(x) > 1000 FROM (SELECT * FROM generateRandom('x Array(LowCardinality(Nullable(String)))') LIMIT 100000); diff --git a/tests/queries/0_stateless/02539_generate_random_map.reference b/tests/queries/0_stateless/02539_generate_random_map.reference new file mode 100644 index 00000000000..c0dc175c3cd --- /dev/null +++ b/tests/queries/0_stateless/02539_generate_random_map.reference @@ -0,0 +1,2 @@ +1 +20 diff --git a/tests/queries/0_stateless/02539_generate_random_map.sql b/tests/queries/0_stateless/02539_generate_random_map.sql new file mode 100644 index 00000000000..a4b25ea3092 --- /dev/null +++ b/tests/queries/0_stateless/02539_generate_random_map.sql @@ -0,0 +1,17 @@ +-- Check that max length works + +SELECT max(length(mapKeys(a))) +FROM +( + SELECT a + FROM generateRandom('a Map(String, String)', 20, 5, 1) + LIMIT 1000 +); + +SELECT max(length(mapKeys(a))) +FROM +( + SELECT a + FROM generateRandom('a Map(String, String)', 20, 5, 20) + LIMIT 1000 +); diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key.reference b/tests/queries/0_stateless/02540_duplicate_primary_key.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02540_duplicate_primary_key.sql b/tests/queries/0_stateless/02540_duplicate_primary_key.sql new file mode 100644 index 00000000000..322b6d74845 --- /dev/null +++ b/tests/queries/0_stateless/02540_duplicate_primary_key.sql @@ -0,0 +1,105 @@ +drop table if exists test; + +set allow_suspicious_low_cardinality_types = 1; + +CREATE TABLE test +( + `coverage` DateTime, + `haunt` Nullable(Float32) CODEC(Gorilla, ZSTD(1)), + `sail` Nullable(Float32) CODEC(Gorilla, ZSTD(1)), + `empowerment_turnstile` UInt8, + `empowerment_haversack` Nullable(Int16), + `empowerment_function` Nullable(Int16), + `empowerment_guidance` Nullable(Int32), + `empowerment_high` Nullable(Int32), + `trading_id` Nullable(Int32), + `guidance` Nullable(Int32), + `empowerment_rawhide` Int32, + `memo` Nullable(Int16), + `oeuvre` Nullable(Int16), + `bun` Nullable(Int16), + `tramp` String, + `anthropology_total` Nullable(Float32), + `situation_name` String, + `timing` Nullable(String), + `NAME_cockroach` String, + `NAME_toe` String, + `business_error_methane` FixedString(110), + `business_instrumentation_methane` FixedString(15), + `market` UInt8, + `crew_memo` Nullable(Int16), + `crew_oeuvre` Nullable(Int16), + `crew_fortnight` Nullable(Int16), + `princess_memo` Nullable(Int16), + `princess_oeuvre` Nullable(Int16), + `princess_fortnight` Nullable(Int16), + `emerald` Nullable(Float32), + `cannon_crate` Nullable(String), + `thinking` String, + `SectorMen` String, + `rage_name` Nullable(String), + `DevelopmentalLigandName` String, + `chard_heavy_quadrant` UInt64, + `poster_effective` Nullable(String), + PROJECTION chrysalis_trapezium_ham + ( + SELECT + empowerment_turnstile, + toStartOfInterval(coverage, toIntervalMonth(1)), + toStartOfWeek(coverage, 10), + toStartOfInterval(coverage, toIntervalDay(1)), + NAME_toe, + NAME_cockroach, + situation_name, + memo, + oeuvre, + crew_memo, + crew_oeuvre, + bun, + sum(multiIf(crew_memo IS NULL, 0, 1)), + sum(multiIf(crew_oeuvre IS NULL, 0, 1)), + sum(multiIf(crew_fortnight IS NULL, 0, 1)), + max(toStartOfInterval(coverage, toIntervalDay(1))), + max(CAST(CAST(toStartOfInterval(coverage, toIntervalDay(1)), 'Nullable(DATE)'), 'Nullable(TIMESTAMP)')), + min(toStartOfInterval(coverage, toIntervalDay(1))), + min(CAST(CAST(toStartOfInterval(coverage, toIntervalDay(1)), 'Nullable(DATE)'), 'Nullable(TIMESTAMP)')), + count(), + sum(1) + GROUP BY + empowerment_turnstile, + toStartOfInterval(coverage, toIntervalMonth(1)), + toStartOfWeek(coverage, 10), + toStartOfInterval(coverage, toIntervalDay(1)), + empowerment_turnstile, + toStartOfInterval(coverage, toIntervalMonth(1)), + toStartOfWeek(coverage, 10), + toStartOfInterval(coverage, toIntervalDay(1)), + NAME_toe, + NAME_cockroach, + situation_name, + memo, + oeuvre, + crew_memo, + crew_oeuvre, + bun + ) +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(coverage) +ORDER BY (coverage, situation_name, NAME_toe, NAME_cockroach); + +insert into test select * from generateRandom() limit 10; + +with dissonance as ( + Select cast(toStartOfInterval(coverage, INTERVAL 1 day) as Date) as flour, count() as regulation + from test + group by flour having flour >= toDate(now())-100 + ), +cheetah as ( + Select flour, regulation from dissonance + union distinct + Select toDate(now())-1, ifnull((select regulation from dissonance where flour = toDate(now())-1),0) as regulation +) +Select flour, regulation from cheetah order by flour with fill step 1 limit 100 format Null; + +drop table test; diff --git a/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.reference b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.reference new file mode 100644 index 00000000000..a1b4e2b5a83 --- /dev/null +++ b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.reference @@ -0,0 +1,13 @@ +INCORRECT_DATA +(1) +NOT_FOUND_COLUMN_IN_BLOCK +(1) +{ + "row_1": {"type":"CreateEvent","actor":{"login":"foobar"},"repo":{"name":"ClickHouse\/ClickHouse"},"created_at":"2023-01-26 10:48:02","payload":{"updated_at":"1970-01-01 00:00:00","action":"","comment":{"id":"0","path":"","position":0,"line":0,"user":{"login":""},"diff_hunk":"","original_position":0,"commit_id":"","original_commit_id":""},"review":{"body":"","author_association":"","state":""},"ref":"backport","ref_type":"branch","issue":{"number":0,"title":"","labels":[],"state":"","locked":0,"assignee":{"login":""},"assignees":[],"comment":"","closed_at":"1970-01-01 00:00:00"},"pull_request":{"merged_at":null,"merge_commit_sha":"","requested_reviewers":[],"requested_teams":[],"head":{"ref":"","sha":""},"base":{"ref":"","sha":""},"merged":0,"mergeable":0,"rebaseable":0,"mergeable_state":"","merged_by":null,"review_comments":0,"maintainer_can_modify":0,"commits":0,"additions":0,"deletions":0,"changed_files":0},"size":0,"distinct_size":0,"member":{"login":""},"release":{"tag_name":"","name":""}}} +} +{ + "row_1": {"labels":[],"merged_by":""}, + "row_2": {"labels":[],"merged_by":"foobar"}, + "row_3": {"labels":[],"merged_by":""}, + "row_4": {"labels":["backport"],"merged_by":""} +} diff --git a/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.sh b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.sh new file mode 100755 index 00000000000..f37a36fa192 --- /dev/null +++ b/tests/queries/0_stateless/02540_input_format_json_ignore_unknown_keys_in_named_tuple.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# This are just simple tests +echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=0 --input-format=NDJSON --structure='t Tuple(a UInt32)' -q "select * from table" |& grep -m1 -o INCORRECT_DATA +echo '{"t" : {"a" : 1, "b" : 2}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=1 --input-format=NDJSON --structure='t Tuple(a UInt32)' -q "select * from table" +echo '{"t" : {"b" : 2, "a" : 1}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=0 --input-format=NDJSON --structure='t Tuple(a UInt32)' -q "select * from table" |& grep -m1 -o NOT_FOUND_COLUMN_IN_BLOCK +echo '{"t" : {"b" : 2, "a" : 1}}' | $CLICKHOUSE_LOCAL --input_format_json_ignore_unknown_keys_in_named_tuple=1 --input-format=NDJSON --structure='t Tuple(a UInt32)' -q "select * from table" + +# And now let's try to parse something more complex - gharchive. +# (see https://github.com/ClickHouse/ClickHouse/issues/15323) +# +# NOTE: That JSON here was simplified +gharchive_structure=( + "type String," + "actor Tuple(login String)," + "repo Tuple(name String)," + "created_at DateTime('UTC')," + "payload Tuple( + updated_at DateTime('UTC'), + action String, + comment Tuple( + id UInt64, + path String, + position UInt32, + line UInt32, + user Tuple( + login String + ), + diff_hunk String, + original_position UInt32, + commit_id String, + original_commit_id String + ), + review Tuple( + body String, + author_association String, + state String + ), + ref String, + ref_type String, + issue Tuple( + number UInt32, + title String, + labels Nested( + name String + ), + state String, + locked UInt8, + assignee Tuple( + login String + ), + assignees Nested( + login String + ), + comment String, + closed_at DateTime('UTC') + ), + pull_request Tuple( + merged_at Nullable(DateTime('UTC')), + merge_commit_sha String, + requested_reviewers Nested( + login String + ), + requested_teams Nested( + name String + ), + head Tuple( + ref String, + sha String + ), + base Tuple( + ref String, + sha String + ), + merged UInt8, + mergeable UInt8, + rebaseable UInt8, + mergeable_state String, + merged_by Nullable(String), + /* NOTE: correct type is Tuple, however Tuple cannot be Nullable, + * so you still have to use Nullable(String) and rely on + * input_format_json_read_objects_as_strings, but see also + * https://github.com/ClickHouse/ClickHouse/issues/36464 + */ + /* merged_by Tuple( + * login String + * ), + */ + review_comments UInt32, + maintainer_can_modify UInt8, + commits UInt32, + additions UInt32, + deletions UInt32, + changed_files UInt32 + ), + size UInt32, + distinct_size UInt32, + member Tuple( + login String + ), + release Tuple( + tag_name String, + name String + ) + )" +) +gharchive_settings=( + --date_time_input_format best_effort + --input_format_json_ignore_unknown_keys_in_named_tuple 1 + --input-format JSONEachRow + --output-format JSONObjectEachRow +) + +$CLICKHOUSE_LOCAL "${gharchive_settings[@]}" --structure="${gharchive_structure[*]}" -q "select * from table" <', JSONExtractString(merged_by_, 'login')) AS merged_by + FROM table +" < $CONFIG < + $TEST_HOST + $TEST_PORT + $TEST_DATABASE + + + + test_hostname + MySQL + + + + test_port + $TEST_HOST + 0 + + + + test_secure + $TEST_HOST + 1 + + + + test_database + $TEST_HOST + $CLICKHOUSE_DATABASE + + + + test_user + $TEST_HOST + MySQL + + + + test_password + $TEST_HOST + MySQL + + + + test_history_file + $TEST_HOST + /no/such/dir/.history + + + +EOL + +echo 'hostname' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_hostname -q 'select 1' |& grep -F -o 'Not found address of host: MySQL.' +echo 'port' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_port -q 'select tcpPort()' |& grep -F -o 'Connection refused (localhost:0).' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_port --port $TEST_PORT -q 'select tcpPort()' +echo 'secure' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_secure -q 'select tcpPort()' |& grep -c -F -o -e OPENSSL_internal:WRONG_VERSION_NUMBER -e 'tcp_secure protocol is disabled because poco library was built without NetSSL support.' +echo 'database' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_database -q 'select currentDatabase()' +echo 'user' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_user -q 'select currentUser()' |& grep -F -o 'MySQL: Authentication failed' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_user --user default -q 'select currentUser()' +echo 'password' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_password -q 'select currentUser()' |& grep -F -o 'default: Authentication failed: password is incorrect, or there is no user with such name.' +$CLICKHOUSE_CLIENT --config $CONFIG --host test_password --password "" -q 'select currentUser()' +echo 'history_file' +$CLICKHOUSE_CLIENT --progress off --interactive --config $CONFIG --host test_history_file -q 'select 1' > ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.${FILEEXT} fi touch ${TESTS_PATH}/${NEW_TEST_NO}_${FILENAME}.reference diff --git a/tests/queries/0_stateless/data_arrow/duration.arrow b/tests/queries/0_stateless/data_arrow/duration.arrow new file mode 100644 index 00000000000..abbdae772ed Binary files /dev/null and b/tests/queries/0_stateless/data_arrow/duration.arrow differ diff --git a/tests/queries/1_stateful/00172_hits_joins.reference.j2 b/tests/queries/1_stateful/00172_hits_joins.reference.j2 index c357ede4c2c..1a43f1fb6ef 100644 --- a/tests/queries/1_stateful/00172_hits_joins.reference.j2 +++ b/tests/queries/1_stateful/00172_hits_joins.reference.j2 @@ -1,4 +1,4 @@ -{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge'] -%} +{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge', 'grace_hash'] -%} --- {{ join_algorithm }} --- 2014-03-17 1406958 265108 2014-03-19 1405797 261624 diff --git a/tests/queries/1_stateful/00172_hits_joins.sql.j2 b/tests/queries/1_stateful/00172_hits_joins.sql.j2 index 07ea899f536..4599d1d5a5d 100644 --- a/tests/queries/1_stateful/00172_hits_joins.sql.j2 +++ b/tests/queries/1_stateful/00172_hits_joins.sql.j2 @@ -1,6 +1,7 @@ -{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge'] -%} +{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge', 'grace_hash'] -%} -SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}20K{% else %}0{% endif %}'; +SET max_rows_in_join = '{% if join_algorithm == 'grace_hash' %}10K{% else %}0{% endif %}'; +SET grace_hash_join_initial_buckets = 4; SELECT '--- {{ join_algorithm }} ---'; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 0bf8023d698..75c693bc1a8 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -104,6 +104,8 @@ NYPD NuRaft ObjectId Ok +OLAP +OLTP OpenSUSE OpenStack OpenTelemetry @@ -125,6 +127,8 @@ PrettySpaceNoEscapesMonoBlock Protobuf ProtobufSingle QTCreator +QueryCacheHits +QueryCacheMisses RBAC RawBLOB RedHat @@ -325,6 +329,7 @@ kafka kafkacat konsole latencies +laion lexicographically libFuzzer libc @@ -490,6 +495,7 @@ tokenization toml toolchain toolset +transactionally tskv tsv tui diff --git a/utils/compressor/decompress_perf.cpp b/utils/compressor/decompress_perf.cpp index 891a6d3d1dd..cb98121c024 100644 --- a/utils/compressor/decompress_perf.cpp +++ b/utils/compressor/decompress_perf.cpp @@ -74,10 +74,10 @@ protected: size_decompressed = unalignedLoad(&own_compressed_buffer[5]); } else - throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + throw Exception(ErrorCodes::UNKNOWN_COMPRESSION_METHOD, "Unknown compression method: {}", toString(method)); if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) - throw Exception("Too large size_compressed. Most likely corrupted data.", ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); + throw Exception(ErrorCodes::TOO_LARGE_SIZE_COMPRESSED, "Too large size_compressed. Most likely corrupted data."); /// Is whole compressed block located in 'compressed_in' buffer? if (compressed_in->offset() >= COMPRESSED_BLOCK_HEADER_SIZE && @@ -111,14 +111,14 @@ protected: compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, static_cast(size_decompressed)) < 0) { - throw Exception("Cannot LZ4_decompress_fast", ErrorCodes::CANNOT_DECOMPRESS); + throw Exception(ErrorCodes::CANNOT_DECOMPRESS, "Cannot LZ4_decompress_fast"); } } else LZ4::decompress(compressed_buffer + COMPRESSED_BLOCK_HEADER_SIZE, to, size_compressed_without_checksum, size_decompressed, perf_stat); } else - throw Exception("Unknown compression method: " + toString(method), ErrorCodes::UNKNOWN_COMPRESSION_METHOD); + throw Exception(ErrorCodes::UNKNOWN_COMPRESSION_METHOD, "Unknown compression method: {}", toString(method)); } public: diff --git a/utils/keeper-data-dumper/main.cpp b/utils/keeper-data-dumper/main.cpp index 0ea6371b49f..e82b21079fe 100644 --- a/utils/keeper-data-dumper/main.cpp +++ b/utils/keeper-data-dumper/main.cpp @@ -69,7 +69,8 @@ int main(int argc, char *argv[]) LOG_INFO(logger, "Last committed index: {}", last_commited_index); - DB::KeeperLogStore changelog(argv[2], 10000000, true, settings->compress_logs); + DB::KeeperLogStore changelog( + argv[2], LogFileSettings{.force_sync = true, .compress_logs = settings->compress_logs, .rotate_interval = 10000000}); changelog.init(last_commited_index, 10000000000UL); /// collect all logs if (changelog.size() == 0) LOG_INFO(logger, "Changelog empty"); diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index f11bf7a0c26..4535eeaf243 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,10 +1,14 @@ +v23.1.2.9-stable 2023-01-29 +v23.1.1.3077-stable 2023-01-25 v22.12.3.5-stable 2023-01-10 v22.12.2.25-stable 2023-01-06 v22.12.1.1752-stable 2022-12-15 +v22.11.5.15-stable 2023-01-29 v22.11.4.3-stable 2023-01-10 v22.11.3.47-stable 2023-01-09 v22.11.2.30-stable 2022-12-02 v22.11.1.1360-stable 2022-11-17 +v22.10.7.13-stable 2023-01-26 v22.10.6.3-stable 2023-01-10 v22.10.5.54-stable 2023-01-09 v22.10.4.23-stable 2022-12-02 @@ -18,6 +22,7 @@ v22.9.4.32-stable 2022-10-26 v22.9.3.18-stable 2022-09-30 v22.9.2.7-stable 2022-09-23 v22.9.1.2603-stable 2022-09-22 +v22.8.13.20-lts 2023-01-29 v22.8.12.45-lts 2023-01-10 v22.8.11.15-lts 2022-12-08 v22.8.10.29-lts 2022-12-02