diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9200e5e87b8..c6555fd9f91 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,38 +12,10 @@ jobs: ReleasePublish: runs-on: [self-hosted, style-checker] steps: - - name: Set envs + - name: Deploy packages and assets run: | - cat >> "$GITHUB_ENV" << 'EOF' - JFROG_API_KEY=${{ secrets.JFROG_ARTIFACTORY_API_KEY }} - TEMP_PATH=${{runner.temp}}/release_packages - REPO_COPY=${{runner.temp}}/release_packages/ClickHouse - EOF - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - # Always use the most recent script version - ref: master - - name: Download packages and push to Artifactory - run: | - rm -rf "$TEMP_PATH" && mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY" - # Download and push packages to artifactory - python3 ./tests/ci/push_to_artifactory.py --release '${{ github.ref }}' \ - --commit '${{ github.sha }}' --artifactory-url '${{ secrets.JFROG_ARTIFACTORY_URL }}' --all - # Download macos binaries to ${{runner.temp}}/download_binary - python3 ./tests/ci/download_binary.py --version '${{ github.ref }}' \ - --commit '${{ github.sha }}' binary_darwin binary_darwin_aarch64 - mv '${{runner.temp}}/download_binary/'clickhouse-* '${{runner.temp}}/push_to_artifactory' - - name: Upload packages to release assets - uses: svenstaro/upload-release-action@v2 - with: - repo_token: ${{ secrets.GITHUB_TOKEN }} - file: ${{runner.temp}}/push_to_artifactory/* - overwrite: true - tag: ${{ github.ref }} - file_glob: true + GITHUB_TAG="${GITHUB_REF#refs/tags/}" + curl '${{ secrets.PACKAGES_RELEASE_URL }}/release/'"${GITHUB_TAG}"'?binary=binary_darwin&binary=binary_darwin_aarch64&sync=true' -d '' ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ diff --git a/SECURITY.md b/SECURITY.md index 3dcdc5db009..0fd72971d30 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 23.1 | ✔️ | | 22.12 | ✔️ | | 22.11 | ✔️ | -| 22.10 | ✔️ | +| 22.10 | ❌ | | 22.9 | ❌ | | 22.8 | ✔️ | | 22.7 | ❌ | @@ -25,18 +26,7 @@ The following versions of ClickHouse server are currently being supported with s | 22.3 | ✔️ | | 22.2 | ❌ | | 22.1 | ❌ | -| 21.12 | ❌ | -| 21.11 | ❌ | -| 21.10 | ❌ | -| 21.9 | ❌ | -| 21.8 | ❌ | -| 21.7 | ❌ | -| 21.6 | ❌ | -| 21.5 | ❌ | -| 21.4 | ❌ | -| 21.3 | ❌ | -| 21.2 | ❌ | -| 21.1 | ❌ | +| 21.* | ❌ | | 20.* | ❌ | | 19.* | ❌ | | 18.* | ❌ | diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 87b11c46f45..812a0d9e64b 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54470) -SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 13) +SET(VERSION_REVISION 54471) +SET(VERSION_MAJOR 23) +SET(VERSION_MINOR 2) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3) -SET(VERSION_DESCRIBE v22.13.1.1-testing) -SET(VERSION_STRING 22.13.1.1) +SET(VERSION_GITHASH dcaac47702510cc87ddf266bc524f6b7ce0a8e6e) +SET(VERSION_DESCRIBE v23.2.1.1-testing) +SET(VERSION_STRING 23.2.1.1) # end of autochange diff --git a/contrib/NuRaft b/contrib/NuRaft index afc36dfa9b0..545b8c810a9 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit afc36dfa9b0beb45bc4cd935060631cc80ba04a5 +Subproject commit 545b8c810a956b2efdc116e86be219af7e83d68a diff --git a/contrib/arrow b/contrib/arrow index 450a5638704..d03245f801f 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 450a5638704386356f8e520080468fc9bc8bcaf8 +Subproject commit d03245f801f798c63ee9a7d2b8914a9e5c5cd666 diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 633de4fdbfc..dcbfca68839 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.12.3.5" +ARG VERSION="23.1.1.3077" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index b6a93c808c3..f755a44ff32 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.12.3.5" +ARG VERSION="23.1.1.3077" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 4afd2745526..3fce357cc19 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -146,6 +146,12 @@ def prepare_for_hung_check(drop_databases): "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" ) ) + # Long query from 02136_kill_scalar_queries + call_with_retry( + make_query_command( + "KILL QUERY WHERE query LIKE 'SELECT (SELECT number FROM system.numbers WHERE number = 1000000000000)%'" + ) + ) if drop_databases: for i in range(5): diff --git a/docs/changelogs/v22.10.7.13-stable.md b/docs/changelogs/v22.10.7.13-stable.md new file mode 100644 index 00000000000..c906e00e524 --- /dev/null +++ b/docs/changelogs/v22.10.7.13-stable.md @@ -0,0 +1,21 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.10.7.13-stable (d261d9036cc) FIXME as compared to v22.10.6.3-stable (645a66d221f) + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#44998](https://github.com/ClickHouse/ClickHouse/issues/44998): Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Backported in [#45551](https://github.com/ClickHouse/ClickHouse/issues/45551): Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.1.1.3077-stable.md b/docs/changelogs/v23.1.1.3077-stable.md new file mode 100644 index 00000000000..e218be62f09 --- /dev/null +++ b/docs/changelogs/v23.1.1.3077-stable.md @@ -0,0 +1,592 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.1.1.3077-stable (dcaac477025) FIXME as compared to v22.12.1.1752-stable (688e488e930) + +#### Backward Incompatible Change +* Remove query `SYSTEM RESTART DISK`. [#44647](https://github.com/ClickHouse/ClickHouse/pull/44647) ([alesapin](https://github.com/alesapin)). +* Disallow Gorilla compression on columns of non-Float32 or non-Float64 type. [#45252](https://github.com/ClickHouse/ClickHouse/pull/45252) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove PREALLOCATE for HASHED/SPARSE_HASHED dictionaries. [#45388](https://github.com/ClickHouse/ClickHouse/pull/45388) ([Azat Khuzhin](https://github.com/azat)). +* Parallel quorum inserts might work incorrectly with `*MergeTree` tables created with deprecated syntax. Therefore, parallel quorum inserts support is completely disabled for such tables. It does not affect tables created with a new syntax. [#45430](https://github.com/ClickHouse/ClickHouse/pull/45430) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### New Feature +* Add `quantileInterpolatedWeighted`/`quantilesInterpolatedWeighted` functions. [#38252](https://github.com/ClickHouse/ClickHouse/pull/38252) ([Bharat Nallan](https://github.com/bharatnc)). +* Add an experimental inverted index as a new secondary index type for efficient text search. [#38667](https://github.com/ClickHouse/ClickHouse/pull/38667) ([larryluogit](https://github.com/larryluogit)). +* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#38953](https://github.com/ClickHouse/ClickHouse/pull/38953) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Dictionary source for extracting keys by traversing regular expressions tree. [#40878](https://github.com/ClickHouse/ClickHouse/pull/40878) ([Vage Ogannisian](https://github.com/nooblose)). +* Added parametrized view functionality, now it's possible to specify query parameters for View table engine. resolves [#40907](https://github.com/ClickHouse/ClickHouse/issues/40907). [#41687](https://github.com/ClickHouse/ClickHouse/pull/41687) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* added extendable and configurable scheduling subsystem for IO requests (not yet integrated with IO code itself). [#41840](https://github.com/ClickHouse/ClickHouse/pull/41840) ([Sergei Trifonov](https://github.com/serxa)). +* Added `SYSTEM DROP DATABASE REPLICA` that removes metadata of dead replica of `Replicated` database. Resolves [#41794](https://github.com/ClickHouse/ClickHouse/issues/41794). [#42807](https://github.com/ClickHouse/ClickHouse/pull/42807) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Array join support map type, like function explode in spark. [#43239](https://github.com/ClickHouse/ClickHouse/pull/43239) ([李扬](https://github.com/taiyang-li)). +* Support SQL standard binary and hex string literals. [#43785](https://github.com/ClickHouse/ClickHouse/pull/43785) ([Mo Xuan](https://github.com/mo-avatar)). +* Add experimental query result cache. [#43797](https://github.com/ClickHouse/ClickHouse/pull/43797) ([Robert Schulze](https://github.com/rschu1ze)). +* format datetime in joda datetime style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. [#43818](https://github.com/ClickHouse/ClickHouse/pull/43818) ([李扬](https://github.com/taiyang-li)). +* to merge [#40878](https://github.com/ClickHouse/ClickHouse/issues/40878) , supporting regexp dictionary. [#43858](https://github.com/ClickHouse/ClickHouse/pull/43858) ([Han Fei](https://github.com/hanfei1991)). +* Implemented a fractional second formatter (`%f`) for formatDateTime. [#44060](https://github.com/ClickHouse/ClickHouse/pull/44060) ([ltrk2](https://github.com/ltrk2)). +* Added age function to calculate difference between two dates or dates with time values expressed as number of full units. Closes [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#44421](https://github.com/ClickHouse/ClickHouse/pull/44421) ([Robert Schulze](https://github.com/rschu1ze)). +* Implemented a fractional second formatter (%f) for formatDateTime. This is slightly modified PR [#44060](https://github.com/ClickHouse/ClickHouse/issues/44060) by @ltrk2. [#44497](https://github.com/ClickHouse/ClickHouse/pull/44497) ([Alexander Gololobov](https://github.com/davenger)). +* Add null source for dictionaries. Closes [#44240](https://github.com/ClickHouse/ClickHouse/issues/44240). [#44502](https://github.com/ClickHouse/ClickHouse/pull/44502) ([mayamika](https://github.com/mayamika)). +* We can use `s3_storage_class` to set different tier. Such as ``` s3 xxx xxx xxx STANDARD/INTELLIGENT_TIERING ``` Closes [#44443](https://github.com/ClickHouse/ClickHouse/issues/44443). [#44707](https://github.com/ClickHouse/ClickHouse/pull/44707) ([chen](https://github.com/xiedeyantu)). +* Try to detect header with column names (and maybe types) for CSV/TSV/CustomSeparated input formats. Add settings `input_format_tsv/csv/custom_detect_header` that enables this behaviour (enabled by default). Closes [#44640](https://github.com/ClickHouse/ClickHouse/issues/44640). [#44953](https://github.com/ClickHouse/ClickHouse/pull/44953) ([Kruglov Pavel](https://github.com/Avogar)). +* Insert default values in case of missing elements in JSON object while parsing named tuple. Add setting `input_format_json_defaults_for_missing_elements_in_named_tuple` that controls this behaviour. Closes [#45142](https://github.com/ClickHouse/ClickHouse/issues/45142)#issuecomment-1380153217. [#45231](https://github.com/ClickHouse/ClickHouse/pull/45231) ([Kruglov Pavel](https://github.com/Avogar)). +* - Add total memory and used memory metrics with respect to cgroup in AsyncMetrics (https://github.com/ClickHouse/ClickHouse/issues/37983). [#45301](https://github.com/ClickHouse/ClickHouse/pull/45301) ([sichenzhao](https://github.com/sichenzhao)). +* Introduce non-throwing variants of hasToken and hasTokenCaseInsensitive. [#45341](https://github.com/ClickHouse/ClickHouse/pull/45341) ([ltrk2](https://github.com/ltrk2)). + +#### Performance Improvement +* Added sharding support in HashedDictionary to allow parallel load (almost linear scaling based on number of shards). [#40003](https://github.com/ClickHouse/ClickHouse/pull/40003) ([Azat Khuzhin](https://github.com/azat)). +* Do not load inactive parts at startup of `MergeTree` tables. [#42181](https://github.com/ClickHouse/ClickHouse/pull/42181) ([Anton Popov](https://github.com/CurtizJ)). +* - Speed up query parsing. [#42284](https://github.com/ClickHouse/ClickHouse/pull/42284) ([Raúl Marín](https://github.com/Algunenano)). +* Always replace OR chain `expr = x1 OR ... OR expr = xN` to `expr IN (x1, ..., xN)` in case if `expr` is a `LowCardinality` column. Setting `optimize_min_equality_disjunction_chain_length` is ignored in this case. [#42889](https://github.com/ClickHouse/ClickHouse/pull/42889) ([Guo Wangyang](https://github.com/guowangy)). +* > Original changelog In the original implementation, the memory of ThreadGroupStatus:: finished_threads_counters_memory is released by moving it to a temporary std::vector, which soon expired and gets destructed. This method is viable, however not straightforward enough. To enhance the code readability, this commit releases the memory in the vector by firstly resizing it to 0 and then shrinking the capacity accordingly. [#43586](https://github.com/ClickHouse/ClickHouse/pull/43586) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* As a follow-up of [#42214](https://github.com/ClickHouse/ClickHouse/issues/42214), this PR tries to optimize the column-wise ternary logic evaluation by achieving auto-vectorization. In the performance test of this [microbenchmark](https://github.com/ZhiguoZh/ClickHouse/blob/20221123-ternary-logic-opt-example/src/Functions/examples/associative_applier_perf.cpp), we've observed a peak **performance gain** of **21x** on the ICX device (Intel Xeon Platinum 8380 CPU). [#43669](https://github.com/ClickHouse/ClickHouse/pull/43669) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Improved latency of reading from storage `S3` and table function `s3` with large number of small files. Now settings `remote_filesystem_read_method` and `remote_filesystem_read_prefetch` take effect while reading from storage `S3`. [#43726](https://github.com/ClickHouse/ClickHouse/pull/43726) ([Anton Popov](https://github.com/CurtizJ)). +* - Avoid acquiring read locks in system.tables if possible. [#43840](https://github.com/ClickHouse/ClickHouse/pull/43840) ([Raúl Marín](https://github.com/Algunenano)). +* The performance experiments of SSB (Star Schema Benchmark) on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) shows that this change could effectively decrease the lock contention for ThreadPoolImpl::mutex by **75%**, increasing the CPU utilization and improving the overall performance by **2.4%**. [#44308](https://github.com/ClickHouse/ClickHouse/pull/44308) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Now optimisation is applied only if the cached HT size is sufficiently large (thresholds were determined empirically and hardcoded). [#44455](https://github.com/ClickHouse/ClickHouse/pull/44455) ([Nikita Taranov](https://github.com/nickitat)). +* ... The whole struct field will be loaded at current, even though we just want to read one field of the struct. [#44484](https://github.com/ClickHouse/ClickHouse/pull/44484) ([lgbo](https://github.com/lgbo-ustc)). +* Small performance improvement for asynchronous reading from remote fs. [#44868](https://github.com/ClickHouse/ClickHouse/pull/44868) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Switched to faster shared (RW) mutex implementation. Performance may be improved in queries with a lot of thread synchronization or for data structures experiencing heavy contention. [#45007](https://github.com/ClickHouse/ClickHouse/pull/45007) ([Sergei Trifonov](https://github.com/serxa)). +* Add fast path for: - col like '%%' - col like '%' - col not like '%' - col not like '%' - match(col, '.*'). [#45244](https://github.com/ClickHouse/ClickHouse/pull/45244) ([李扬](https://github.com/taiyang-li)). +* todo. [#45289](https://github.com/ClickHouse/ClickHouse/pull/45289) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Refactor and Improve streaming engines Kafka/RabbitMQ/NATS and add support for all formats, also refactor formats a bit: - Fix producing messages in row-based formats with suffixes/prefixes. Now every message is formatted complitely with all delimiters and can be parsed back using input format. - Support block-based formats like Native, Parquet, ORC, etc. Every block is formatted as a separated message. The number of rows in one message depends on block size, so you can control it via setting `max_block_size`. - Add new engine settings `kafka_max_rows_per_message/rabbitmq_max_rows_per_message/nats_max_rows_per_message`. They control the number of rows formatted in one message in row-based formats. Default value: 1. - Fix high memory consumption in NATS table engine. - Support arbitrary binary data in NATS producer (previously it worked only with strings contained \0 at the end) - Add missing Kafka/RabbitMQ/NATS engine settings in documentation. - Refactor producing and consuming in Kafka/RabbitMQ/NATS, separate it from WriteBuffers/ReadBuffers semantic. - Refactor output formats: remove callbacks on each row used in Kafka/RabbitMQ/NATS (now we don't use callbacks there), allow to use IRowOutputFormat directly, clarify row end and row between delimiters, make it possible to reset output format to start formatting again - Add proper implementation in formatRow function (bonus after formats refactoring). [#42777](https://github.com/ClickHouse/ClickHouse/pull/42777) ([Kruglov Pavel](https://github.com/Avogar)). +* Support `optimize_or_like_chain` in the new infrastructure. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#42797](https://github.com/ClickHouse/ClickHouse/pull/42797) ([Dmitry Novik](https://github.com/novikd)). +* Improve the Asterisk and ColumnMatcher parsers. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#42884](https://github.com/ClickHouse/ClickHouse/pull/42884) ([Nikolay Degterinsky](https://github.com/evillique)). +* Implement `optimize_redundant_functions_in_order_by` on top of QueryTree. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#42970](https://github.com/ClickHouse/ClickHouse/pull/42970) ([Dmitry Novik](https://github.com/novikd)). +* Support `optimize_group_by_function_keys` in the new analyzer architecture. Also, add support for optimizing GROUPING SETS keys. Part of [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#43261](https://github.com/ClickHouse/ClickHouse/pull/43261) ([Dmitry Novik](https://github.com/novikd)). +* Improve reading CSV field in CustomSeparated/Template format. Closes [#42352](https://github.com/ClickHouse/ClickHouse/issues/42352) Closes [#39620](https://github.com/ClickHouse/ClickHouse/issues/39620). [#43332](https://github.com/ClickHouse/ClickHouse/pull/43332) ([Kruglov Pavel](https://github.com/Avogar)). +* Support reading/writing `Nested` tables as `List` of `Struct` in CapnProto format. Read/write `Decimal32/64` as `Int32/64`. Closes [#43319](https://github.com/ClickHouse/ClickHouse/issues/43319). [#43379](https://github.com/ClickHouse/ClickHouse/pull/43379) ([Kruglov Pavel](https://github.com/Avogar)). +* - Unify query elapsed time measurements. [#43455](https://github.com/ClickHouse/ClickHouse/pull/43455) ([Raúl Marín](https://github.com/Algunenano)). +* Support scalar subqueries cache Implementation: * Added a map with hash of the node (without alias) and the evaluated value to Context. Testing: * Added a test-case with new analyser in 02174_cte_scalar_cache.sql. [#43640](https://github.com/ClickHouse/ClickHouse/pull/43640) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Improve automatic usage of structure from insertion table in table functions file/hdfs/s3 when virtual columns present in select query, it fixes possible error `Block structure mismatch` or `number of columns mismatch`. [#43695](https://github.com/ClickHouse/ClickHouse/pull/43695) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for signed arguments in range(). Fixes [#43333](https://github.com/ClickHouse/ClickHouse/issues/43333). [#43733](https://github.com/ClickHouse/ClickHouse/pull/43733) ([sanyu](https://github.com/wineternity)). +* Remove redundant sorting, for example, sorting related ORDER BY clauses in subqueries. Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding `ORDER BY` clauses, but more generic, since it's applied to any redundant sorting steps (not only caused by ORDER BY clause) and applied to subqueries of any depth. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#43905](https://github.com/ClickHouse/ClickHouse/pull/43905) ([Igor Nikonov](https://github.com/devcrafter)). +* Added mmap support for StorageFile, which should improve the performance of clickhouse-local. [#43927](https://github.com/ClickHouse/ClickHouse/pull/43927) ([pufit](https://github.com/pufit)). +* Add ability to disable deduplication for BACKUP (for backups wiithout deduplication ATTACH can be used instead of full RESTORE), example `BACKUP foo TO S3(...) SETTINGS deduplicate_files=0` (default `deduplicate_files=1`). [#43947](https://github.com/ClickHouse/ClickHouse/pull/43947) ([Azat Khuzhin](https://github.com/azat)). +* Make `system.replicas` table do parallel fetches of replicas statuses. Closes [#43918](https://github.com/ClickHouse/ClickHouse/issues/43918). [#43998](https://github.com/ClickHouse/ClickHouse/pull/43998) ([Nikolay Degterinsky](https://github.com/evillique)). +* Refactor and improve schema inference for text formats. Add new setting `schema_inference_make_columns_nullable` that controls making result types `Nullable` (enabled by default);. [#44019](https://github.com/ClickHouse/ClickHouse/pull/44019) ([Kruglov Pavel](https://github.com/Avogar)). +* Better support for PROXYv1. [#44135](https://github.com/ClickHouse/ClickHouse/pull/44135) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add information about the latest part check by cleanup thread into `system.parts` table. [#44244](https://github.com/ClickHouse/ClickHouse/pull/44244) ([Dmitry Novik](https://github.com/novikd)). +* Disable functions in readonly for inserts. [#44290](https://github.com/ClickHouse/ClickHouse/pull/44290) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add a setting `simultaneous_parts_removal_limit` to allow to limit the number of parts being processed by one iteration of CleanupThread. [#44461](https://github.com/ClickHouse/ClickHouse/pull/44461) ([Dmitry Novik](https://github.com/novikd)). +* If user only need virtual columns, we don't need to initialize ReadBufferFromS3. May be helpful to [#44246](https://github.com/ClickHouse/ClickHouse/issues/44246). [#44493](https://github.com/ClickHouse/ClickHouse/pull/44493) ([chen](https://github.com/xiedeyantu)). +* Prevent duplicate column names hints. Closes [#44130](https://github.com/ClickHouse/ClickHouse/issues/44130). [#44519](https://github.com/ClickHouse/ClickHouse/pull/44519) ([Joanna Hulboj](https://github.com/jh0x)). +* Allow macro substitution in endpoint of disks resolve [#40951](https://github.com/ClickHouse/ClickHouse/issues/40951). [#44533](https://github.com/ClickHouse/ClickHouse/pull/44533) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Added a `message_format_string` column to `system.text_log`. The column contains a pattern that was used to format the message. [#44543](https://github.com/ClickHouse/ClickHouse/pull/44543) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve schema inference when `input_format_json_read_object_as_string` is enabled. [#44546](https://github.com/ClickHouse/ClickHouse/pull/44546) ([Kruglov Pavel](https://github.com/Avogar)). +* Add user-level setting `database_replicated_allow_replicated_engine_arguments` which allow to ban creation of `ReplicatedMergeTree` tables with arguments in `DatabaseReplicated`. [#44566](https://github.com/ClickHouse/ClickHouse/pull/44566) ([alesapin](https://github.com/alesapin)). +* Prevent users from mistakenly specifying zero (invalid) value for `index_granularity`. This closes [#44536](https://github.com/ClickHouse/ClickHouse/issues/44536). [#44578](https://github.com/ClickHouse/ClickHouse/pull/44578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added possibility to set path to service keytab file in `keytab` parameter in `kerberos` section of config.xml. [#44594](https://github.com/ClickHouse/ClickHouse/pull/44594) ([Roman Vasin](https://github.com/rvasin)). +* Use already written part of the query for fuzzy search (pass to skim). [#44600](https://github.com/ClickHouse/ClickHouse/pull/44600) ([Azat Khuzhin](https://github.com/azat)). +* Enable input_format_json_read_objects_as_strings by default to be able to read nested JSON objects while JSON Object type is experimental. [#44657](https://github.com/ClickHouse/ClickHouse/pull/44657) ([Kruglov Pavel](https://github.com/Avogar)). +* When users do duplicate async inserts, we should dedup inside the memory before we query keeper. [#44682](https://github.com/ClickHouse/ClickHouse/pull/44682) ([Han Fei](https://github.com/hanfei1991)). +* Input/ouptut Avro bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)). +* - Don't parse beyond the quotes when reading UUIDs. [#44686](https://github.com/ClickHouse/ClickHouse/pull/44686) ([Raúl Marín](https://github.com/Algunenano)). +* Infer UInt64 in case of Int64 overflow and fix some transforms in schema inference. [#44696](https://github.com/ClickHouse/ClickHouse/pull/44696) ([Kruglov Pavel](https://github.com/Avogar)). +* Previously dependency resolving inside DatabaseReplicated was done in a hacky way and now it done right using an explicit graph. [#44697](https://github.com/ClickHouse/ClickHouse/pull/44697) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Support Bool type in Arrow/Parquet/ORC. Closes [#43970](https://github.com/ClickHouse/ClickHouse/issues/43970). [#44698](https://github.com/ClickHouse/ClickHouse/pull/44698) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `output_format_pretty_row_numbers` does not preserve the counter across the blocks. Closes [#44815](https://github.com/ClickHouse/ClickHouse/issues/44815). [#44832](https://github.com/ClickHouse/ClickHouse/pull/44832) ([flynn](https://github.com/ucasfl)). +* Extend function "toDayOfWeek" with a mode argument describing if a) the week starts on Monday or Sunday and b) if counting starts at 0 or 1. [#44860](https://github.com/ClickHouse/ClickHouse/pull/44860) ([李扬](https://github.com/taiyang-li)). +* - Don't report errors in system.errors due to parts being merged concurrently with the background cleanup process. [#44874](https://github.com/ClickHouse/ClickHouse/pull/44874) ([Raúl Marín](https://github.com/Algunenano)). +* Optimize and fix metrics for Distributed async INSERT. [#44922](https://github.com/ClickHouse/ClickHouse/pull/44922) ([Azat Khuzhin](https://github.com/azat)). +* Added settings to disallow concurrent backups and restores resolves [#43891](https://github.com/ClickHouse/ClickHouse/issues/43891) Implementation: * Added server level settings to disallow concurrent backups and restores, which are read and set when BackupWorker is created in Context. * Settings are set to true by default. * Before starting backup or restores, added a check to see if any other backups/restores are running. For internal request it checks if its from the self node using backup_uuid. [#45072](https://github.com/ClickHouse/ClickHouse/pull/45072) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* add a cache for async block ids. This will reduce the requests of zookeeper when we enable async inserts deduplication. [#45106](https://github.com/ClickHouse/ClickHouse/pull/45106) ([Han Fei](https://github.com/hanfei1991)). +* CRC32 changes to address the WeakHash collision issue in PowerPC. [#45144](https://github.com/ClickHouse/ClickHouse/pull/45144) ([MeenaRenganathan22](https://github.com/MeenaRenganathan22)). +* Optimize memory consumption during backup to S3: files to S3 now will be copied directly without using `WriteBufferFromS3` (which could use a lot of memory). [#45188](https://github.com/ClickHouse/ClickHouse/pull/45188) ([Vitaly Baranov](https://github.com/vitlibar)). +* Use structure from insertion table in generateRandom without arguments. [#45239](https://github.com/ClickHouse/ClickHouse/pull/45239) ([Kruglov Pavel](https://github.com/Avogar)). +* Use `GetObjectAttributes` request instead of `HeadObject` request to get the size of an object in AWS S3. This change fixes handling endpoints without explicit region, for example. [#45288](https://github.com/ClickHouse/ClickHouse/pull/45288) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `` config parameter for system logs. [#45320](https://github.com/ClickHouse/ClickHouse/pull/45320) ([Stig Bakken](https://github.com/stigsb)). +* Remove redundant sorting, for example, sorting related ORDER BY clauses in subqueries. Implemented on top of query plan. It does similar optimization as `optimize_duplicate_order_by_and_distinct` regarding `ORDER BY` clauses, but more generic, since it's applied to any redundant sorting steps (not only caused by ORDER BY clause) and applied to subqueries of any depth. Related to [#42648](https://github.com/ClickHouse/ClickHouse/issues/42648). [#45420](https://github.com/ClickHouse/ClickHouse/pull/45420) ([Igor Nikonov](https://github.com/devcrafter)). +* Allow to implicitly convert floats stored in string fields of JSON to integers in `JSONExtract` functions. E.g. `JSONExtract('{"a": "1000.111"}', 'a', 'UInt64')` -> `1000`, previously it returned 0. [#45432](https://github.com/ClickHouse/ClickHouse/pull/45432) ([Anton Popov](https://github.com/CurtizJ)). +* Added fields `supports_parallel_parsing` and `supports_parallel_formatting` to table `system.formats` for better introspection. [#45499](https://github.com/ClickHouse/ClickHouse/pull/45499) ([Anton Popov](https://github.com/CurtizJ)). +* Attempt to improve fsync latency (by syncing all files at once during fetches and small files after mutations) and one tiny fix for fsync_part_directory. [#45537](https://github.com/ClickHouse/ClickHouse/pull/45537) ([Azat Khuzhin](https://github.com/azat)). + +#### Bug Fix +* Fix HTTP requests without path for AWS. After updating AWS SDK the sdk no longer adds a slash to requesting paths so we need to do it in our PocoHTTPClient to keep HTTP requests correct. [#45238](https://github.com/ClickHouse/ClickHouse/pull/45238) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix backup if mutations get killed during the backup process. [#45351](https://github.com/ClickHouse/ClickHouse/pull/45351) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Build/Testing/Packaging Improvement +* Builtin skim for fuzzy search in clickhouse client/local history. [#44239](https://github.com/ClickHouse/ClickHouse/pull/44239) ([Azat Khuzhin](https://github.com/azat)). +* Memory limit for server is set now in AST fuzz tests to avoid OOMs. [#44282](https://github.com/ClickHouse/ClickHouse/pull/44282) ([Nikita Taranov](https://github.com/nickitat)). +* In rare cases, we don't rebuild binaries, because another task with a similar prefix succeeded. E.g. `binary_darwin` didn't restart because `binary_darwin_aarch64`. [#44311](https://github.com/ClickHouse/ClickHouse/pull/44311) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* The "universal.sh" now fetches a SSE2 build on systems which don't have SSE4.2. [#44366](https://github.com/ClickHouse/ClickHouse/pull/44366) ([Robert Schulze](https://github.com/rschu1ze)). +* Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* ... 1. Added pytest-random by default in integration tests runner 2. Disable TSAN checks for tests with GPRC ( like https://s3.amazonaws.com/clickhouse-test-reports/42807/e9d7407a58f6e3f7d88c0c534685704f23560704/integration_tests__tsan__[4/6].html ) 3. Cleanup tables after tests in odbc. [#44711](https://github.com/ClickHouse/ClickHouse/pull/44711) ([Ilya Yatsishin](https://github.com/qoega)). +* We removed support for shared linking because of Rust. Actually, Rust is only an excuse for this removal, and we wanted to remove it nevertheless. [#44828](https://github.com/ClickHouse/ClickHouse/pull/44828) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Checks will try to download images before running integration tests. If image, proxy or whatever is broken in infrastructure it will not make tests flaky. Images will be cached locally and download time will not be added to random tests. Compose images are now changed to be used without correct environment from helpers/cluster.py. [#44848](https://github.com/ClickHouse/ClickHouse/pull/44848) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix zookeeper downloading, update the version, and optimize the image size. [#44853](https://github.com/ClickHouse/ClickHouse/pull/44853) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* The performance tests were silently broken because `Errors` wasn't detected in the status message. [#44867](https://github.com/ClickHouse/ClickHouse/pull/44867) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove the dependency on the `adduser` tool from the packages, because we don't use it. This fixes [#44934](https://github.com/ClickHouse/ClickHouse/issues/44934). [#45011](https://github.com/ClickHouse/ClickHouse/pull/45011) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SQLite library is updated to the latest. It is used for the SQLite database and table integration engines. Also, fixed a false-positive TSan report. This closes [#45027](https://github.com/ClickHouse/ClickHouse/issues/45027). [#45031](https://github.com/ClickHouse/ClickHouse/pull/45031) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix report sending in the case when FastTest failed. [#45588](https://github.com/ClickHouse/ClickHouse/pull/45588) ([Dmitry Novik](https://github.com/novikd)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* #40651 [#41404](https://github.com/ClickHouse/ClickHouse/issues/41404). [#42126](https://github.com/ClickHouse/ClickHouse/pull/42126) ([Alexander Gololobov](https://github.com/davenger)). +* Fix possible use-of-unitialized value after executing expressions after sorting. Closes [#43386](https://github.com/ClickHouse/ClickHouse/issues/43386) CC: @nickitat. [#43635](https://github.com/ClickHouse/ClickHouse/pull/43635) ([Kruglov Pavel](https://github.com/Avogar)). +* Better handling of NULL in aggregate combinators, fix possible segfault/logical error while using optimization `optimize_rewrite_sum_if_to_count_if`. Closes [#43758](https://github.com/ClickHouse/ClickHouse/issues/43758). [#43813](https://github.com/ClickHouse/ClickHouse/pull/43813) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix CREATE USER/ROLE query settings constraints. [#43993](https://github.com/ClickHouse/ClickHouse/pull/43993) ([Nikolay Degterinsky](https://github.com/evillique)). +* * Fix wrong behavior of `JOIN ON t1.x = t2.x AND 1 = 1`, forbid such queries. [#44016](https://github.com/ClickHouse/ClickHouse/pull/44016) ([Vladimir C](https://github.com/vdimir)). +* Fixed bug with non-parsable default value for EPHEMERAL column in table metadata. [#44026](https://github.com/ClickHouse/ClickHouse/pull/44026) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix parsing of bad version from compatibility setting. [#44224](https://github.com/ClickHouse/ClickHouse/pull/44224) ([Kruglov Pavel](https://github.com/Avogar)). +* Bring interval subtraction from datetime in line with addition. [#44241](https://github.com/ClickHouse/ClickHouse/pull/44241) ([ltrk2](https://github.com/ltrk2)). +* Fix double-free in HashTable::clearAndShrink() with zero elements in it. [#44256](https://github.com/ClickHouse/ClickHouse/pull/44256) ([Azat Khuzhin](https://github.com/azat)). +* Remove limits on maximum size of the result for view. [#44261](https://github.com/ClickHouse/ClickHouse/pull/44261) ([lizhuoyu5](https://github.com/lzydmxy)). +* Fix possible logical error in cache if `do_not_evict_index_and_mrk_files=1`. Closes [#42142](https://github.com/ClickHouse/ClickHouse/issues/42142). [#44268](https://github.com/ClickHouse/ClickHouse/pull/44268) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible too early cache write interruption in write-through cache (caching could be stopped due to false assumption when it shouldn't have). [#44289](https://github.com/ClickHouse/ClickHouse/pull/44289) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible crash in case function `IN` with constant arguments was used as a constant argument together with `LowCardinality`. Fixes [#44221](https://github.com/ClickHouse/ClickHouse/issues/44221). [#44346](https://github.com/ClickHouse/ClickHouse/pull/44346) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix support for complex parameters (like arrays) of parametric aggregate functions. This closes [#30975](https://github.com/ClickHouse/ClickHouse/issues/30975). The aggregate function `sumMapFiltered` was unusable in distributed queries before this change. [#44358](https://github.com/ClickHouse/ClickHouse/pull/44358) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Fix possible nullptr deference in JoinSwitcher with `allow_experimental_analyzer`. [#44371](https://github.com/ClickHouse/ClickHouse/pull/44371) ([Vladimir C](https://github.com/vdimir)). +* Fix reading ObjectId in BSON schema inference. [#44382](https://github.com/ClickHouse/ClickHouse/pull/44382) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix race which can lead to premature temp parts removal before merge finished in ReplicatedMergeTree. This issue could lead to errors like `No such file or directory: xxx`. Fixes [#43983](https://github.com/ClickHouse/ClickHouse/issues/43983). [#44383](https://github.com/ClickHouse/ClickHouse/pull/44383) ([alesapin](https://github.com/alesapin)). +* Some invalid `SYSTEM ... ON CLUSTER` queries worked in an unexpected way if a cluster name was not specified. It's fixed, now invalid queries throw `SYNTAX_ERROR` as they should. Fixes [#44264](https://github.com/ClickHouse/ClickHouse/issues/44264). [#44387](https://github.com/ClickHouse/ClickHouse/pull/44387) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix reading Map type in ORC format. [#44400](https://github.com/ClickHouse/ClickHouse/pull/44400) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading columns that are not presented in input data in Parquet/ORC formats. Previously it could lead to error `INCORRECT_NUMBER_OF_COLUMNS`. Closes [#44333](https://github.com/ClickHouse/ClickHouse/issues/44333). [#44405](https://github.com/ClickHouse/ClickHouse/pull/44405) ([Kruglov Pavel](https://github.com/Avogar)). +* Previously bar() function used the same '▋' (U+258B "Left five eighths block") character to display both 5/8 and 6/8 bars. This change corrects this behavior by using '▊' (U+258A "Left three quarters block") for displaying 6/8 bar. [#44410](https://github.com/ClickHouse/ClickHouse/pull/44410) ([Alexander Gololobov](https://github.com/davenger)). +* Placing profile settings after profile settings constraints in the configuration file made constraints ineffective. [#44411](https://github.com/ClickHouse/ClickHouse/pull/44411) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Fix `SYNTAX_ERROR` while running `EXPLAIN AST INSERT` queries with data. Closes [#44207](https://github.com/ClickHouse/ClickHouse/issues/44207). [#44413](https://github.com/ClickHouse/ClickHouse/pull/44413) ([save-my-heart](https://github.com/save-my-heart)). +* Fix reading bool value with CRLF in CSV format. Closes [#44401](https://github.com/ClickHouse/ClickHouse/issues/44401). [#44442](https://github.com/ClickHouse/ClickHouse/pull/44442) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't execute and/or/if/multiIf on LowCardinality dictionary, so the result type cannot be LowCardinality. It could lead to error `Illegal column ColumnLowCardinality` in some cases. Fixes [#43603](https://github.com/ClickHouse/ClickHouse/issues/43603). [#44469](https://github.com/ClickHouse/ClickHouse/pull/44469) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix mutations with setting `max_streams_for_merge_tree_reading`. [#44472](https://github.com/ClickHouse/ClickHouse/pull/44472) ([Anton Popov](https://github.com/CurtizJ)). +* Fix potential null pointer dereference with GROUPING SETS in ASTSelectQuery::formatImpl ([#43049](https://github.com/ClickHouse/ClickHouse/issues/43049)). [#44479](https://github.com/ClickHouse/ClickHouse/pull/44479) ([Robert Schulze](https://github.com/rschu1ze)). +* Validate types in table function arguments, CAST function arguments, JSONAsObject schema inference according to settings. [#44501](https://github.com/ClickHouse/ClickHouse/pull/44501) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix IN function with LC and const column, close [#44503](https://github.com/ClickHouse/ClickHouse/issues/44503). [#44506](https://github.com/ClickHouse/ClickHouse/pull/44506) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed a bug in normalization of a `DEFAULT` expression in `CREATE TABLE` statement. The second argument of function `in` (or the right argument of operator `IN`) might be replaced with the result of its evaluation during CREATE query execution. Fixes [#44496](https://github.com/ClickHouse/ClickHouse/issues/44496). [#44547](https://github.com/ClickHouse/ClickHouse/pull/44547) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Projections do not work in presence of WITH ROLLUP, WITH CUBE and WITH TOTALS. In previous versions, a query produced an exception instead of skipping the usage of projections. This closes [#44614](https://github.com/ClickHouse/ClickHouse/issues/44614). This closes [#42772](https://github.com/ClickHouse/ClickHouse/issues/42772). [#44615](https://github.com/ClickHouse/ClickHouse/pull/44615) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* * Fix bug in experimental analyzer and `aggregate_functions_null_for_empty = 1`. Close [#44644](https://github.com/ClickHouse/ClickHouse/issues/44644). [#44648](https://github.com/ClickHouse/ClickHouse/pull/44648) ([Vladimir C](https://github.com/vdimir)). +* async blocks are not cleaned because the function `get all blocks sorted by time` didn't get async blocks. [#44651](https://github.com/ClickHouse/ClickHouse/pull/44651) ([Han Fei](https://github.com/hanfei1991)). +* Fix `LOGICAL_ERROR` `The top step of the right pipeline should be ExpressionStep` for JOIN with subquery, UNION, and TOTALS. Fixes [#43687](https://github.com/ClickHouse/ClickHouse/issues/43687). [#44673](https://github.com/ClickHouse/ClickHouse/pull/44673) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid std::out_of_range exception in StorageExecutable. [#44681](https://github.com/ClickHouse/ClickHouse/pull/44681) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply `optimize_syntax_fuse_functions` to quantiles on AST, close [#44712](https://github.com/ClickHouse/ClickHouse/issues/44712). [#44713](https://github.com/ClickHouse/ClickHouse/pull/44713) ([Vladimir C](https://github.com/vdimir)). +* Fix bug with wrong type in Merge table and PREWHERE, close [#43324](https://github.com/ClickHouse/ClickHouse/issues/43324). [#44716](https://github.com/ClickHouse/ClickHouse/pull/44716) ([Vladimir C](https://github.com/vdimir)). +* Fix possible crash during shutdown (while destroying TraceCollector). Fixes [#44757](https://github.com/ClickHouse/ClickHouse/issues/44757). [#44758](https://github.com/ClickHouse/ClickHouse/pull/44758) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a possible crash in distributed query processing. The crash could happen if a query with totals or extremes returned an empty result and there are mismatched types in the Distrubuted and the local tables. Fixes [#44738](https://github.com/ClickHouse/ClickHouse/issues/44738). [#44760](https://github.com/ClickHouse/ClickHouse/pull/44760) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix fsync for fetches (`min_compressed_bytes_to_fsync_after_fetch`)/small files (ttl.txt, columns.txt) in mutations (`min_rows_to_fsync_after_merge`/`min_compressed_bytes_to_fsync_after_merge`). [#44781](https://github.com/ClickHouse/ClickHouse/pull/44781) ([Azat Khuzhin](https://github.com/azat)). +* A rare race condition was possible when querying the `system.parts` or `system.parts_columns` tables in the presence of parts being moved between disks. Introduced in [#41145](https://github.com/ClickHouse/ClickHouse/issues/41145). [#44809](https://github.com/ClickHouse/ClickHouse/pull/44809) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the error `Context has expired` which could appear with enabled projections optimization. Can be reproduced for queries with specific functions, like `dictHas/dictGet` which use context in runtime. Fixes [#44844](https://github.com/ClickHouse/ClickHouse/issues/44844). [#44850](https://github.com/ClickHouse/ClickHouse/pull/44850) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Another fix for `Cannot read all data` error which could happen while reading `LowCardinality` dictionary from remote fs. Fixes [#44709](https://github.com/ClickHouse/ClickHouse/issues/44709). [#44875](https://github.com/ClickHouse/ClickHouse/pull/44875) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* - Ignore hwmon sensors on label read issues. [#44895](https://github.com/ClickHouse/ClickHouse/pull/44895) ([Raúl Marín](https://github.com/Algunenano)). +* Use `max_delay_to_insert` value in case calculated time to delay INSERT exceeds the setting value. Related to [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44916](https://github.com/ClickHouse/ClickHouse/pull/44916) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix error `Different order of columns in UNION subquery` for queries with `UNION`. Fixes [#44866](https://github.com/ClickHouse/ClickHouse/issues/44866). [#44920](https://github.com/ClickHouse/ClickHouse/pull/44920) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Delay for INSERT can be calculated incorrectly, which can lead to always using `max_delay_to_insert` setting as delay instead of a correct value. Using simple formula `max_delay_to_insert * (parts_over_threshold/max_allowed_parts_over_threshold)` i.e. delay grows proportionally to parts over threshold. Closes [#44902](https://github.com/ClickHouse/ClickHouse/issues/44902). [#44954](https://github.com/ClickHouse/ClickHouse/pull/44954) ([Igor Nikonov](https://github.com/devcrafter)). +* fix alter table ttl error when wide part has light weight delete mask. [#44959](https://github.com/ClickHouse/ClickHouse/pull/44959) ([Mingliang Pan](https://github.com/liangliangpan)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native [#43221](https://github.com/ClickHouse/ClickHouse/issues/43221). [#45024](https://github.com/ClickHouse/ClickHouse/pull/45024) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#45043](https://github.com/ClickHouse/ClickHouse/pull/45043) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* A buffer overflow was possible in the parser. Found by fuzzer. [#45047](https://github.com/ClickHouse/ClickHouse/pull/45047) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible cannot-read-all-data error in storage FileLog. Closes [#45051](https://github.com/ClickHouse/ClickHouse/issues/45051), [#38257](https://github.com/ClickHouse/ClickHouse/issues/38257). [#45057](https://github.com/ClickHouse/ClickHouse/pull/45057) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Memory efficient aggregation (setting `distributed_aggregation_memory_efficient`) is disabled when grouping sets are present in the query. [#45058](https://github.com/ClickHouse/ClickHouse/pull/45058) ([Nikita Taranov](https://github.com/nickitat)). +* Fix `RANGE_HASHED` dictionary to count range columns as part of primary key during updates when `update_field` is specified. Closes [#44588](https://github.com/ClickHouse/ClickHouse/issues/44588). [#45061](https://github.com/ClickHouse/ClickHouse/pull/45061) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix error `Cannot capture column` for `LowCardinality` captured argument of nested labmda. Fixes [#45028](https://github.com/ClickHouse/ClickHouse/issues/45028). [#45065](https://github.com/ClickHouse/ClickHouse/pull/45065) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix the wrong query result of `additional_table_filters` (additional filter was not applied) in case if minmax/count projection is used. [#45133](https://github.com/ClickHouse/ClickHouse/pull/45133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* - Fixed bug in `histogram` function accepting negative values. [#45147](https://github.com/ClickHouse/ClickHouse/pull/45147) ([simpleton](https://github.com/rgzntrade)). +* Follow-up fix for Replace domain IP types (IPv4, IPv6) with native https://github.com/ClickHouse/ClickHouse/pull/43221. [#45150](https://github.com/ClickHouse/ClickHouse/pull/45150) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix wrong column nullability in StoreageJoin, close [#44940](https://github.com/ClickHouse/ClickHouse/issues/44940). [#45184](https://github.com/ClickHouse/ClickHouse/pull/45184) ([Vladimir C](https://github.com/vdimir)). +* Fix `background_fetches_pool_size` settings reload (increase at runtime). [#45189](https://github.com/ClickHouse/ClickHouse/pull/45189) ([Raúl Marín](https://github.com/Algunenano)). +* Correctly process `SELECT` queries on KV engines (e.g. KeeperMap, EmbeddedRocksDB) using `IN` on the key with subquery producing different type. [#45215](https://github.com/ClickHouse/ClickHouse/pull/45215) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix logical error in SEMI JOIN & join_use_nulls in some cases, close [#45163](https://github.com/ClickHouse/ClickHouse/issues/45163), close [#45209](https://github.com/ClickHouse/ClickHouse/issues/45209). [#45230](https://github.com/ClickHouse/ClickHouse/pull/45230) ([Vladimir C](https://github.com/vdimir)). +* Fix heap-use-after-free in reading from s3. [#45253](https://github.com/ClickHouse/ClickHouse/pull/45253) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug when the Avro Union type is ['null', Nested type], closes [#45275](https://github.com/ClickHouse/ClickHouse/issues/45275). Fix bug that incorrectly infer `bytes` type to `Float`. [#45276](https://github.com/ClickHouse/ClickHouse/pull/45276) ([flynn](https://github.com/ucasfl)). +* Throw a correct exception when explicit PREWHERE cannot be used with table using storage engine `Merge`. [#45319](https://github.com/ClickHouse/ClickHouse/pull/45319) ([Antonio Andelic](https://github.com/antonio2368)). +* Under WSL1 Ubuntu self-extracting clickhouse fails to decompress due to inconsistency - /proc/self/maps reporting 32bit file's inode, while stat reporting 64bit inode. [#45339](https://github.com/ClickHouse/ClickHouse/pull/45339) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix race in Distributed table startup (that could lead to processing file of async INSERT multiple times). [#45360](https://github.com/ClickHouse/ClickHouse/pull/45360) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible crash while reading from storage `S3` and table function `s3` in case when `ListObject` request has failed. [#45371](https://github.com/ClickHouse/ClickHouse/pull/45371) ([Anton Popov](https://github.com/CurtizJ)). +* * Fixed some bugs in JOINS with WHERE by disabling "move to prewhere" optimization for it, close [#44062](https://github.com/ClickHouse/ClickHouse/issues/44062). [#45391](https://github.com/ClickHouse/ClickHouse/pull/45391) ([Vladimir C](https://github.com/vdimir)). +* Fix `SELECT ... FROM system.dictionaries` exception when there is a dictionary with a bad structure (e.g. incorrect type in xml config). [#45399](https://github.com/ClickHouse/ClickHouse/pull/45399) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix s3Cluster schema inference when structure from insertion table is used in `INSERT INTO ... SELECT * FROM s3Cluster` queries. [#45422](https://github.com/ClickHouse/ClickHouse/pull/45422) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bug in JSON/BSONEachRow parsing with HTTP that could lead to using default values for some columns instead of values from data. [#45424](https://github.com/ClickHouse/ClickHouse/pull/45424) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed bug (Code: 632. DB::Exception: Unexpected data ... after parsed IPv6 value ...) with typed parsing of IP types from text source. [#45425](https://github.com/ClickHouse/ClickHouse/pull/45425) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* close [#45297](https://github.com/ClickHouse/ClickHouse/issues/45297) Add check for empty regular expressions. [#45428](https://github.com/ClickHouse/ClickHouse/pull/45428) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible (likely distributed) query hung. [#45448](https://github.com/ClickHouse/ClickHouse/pull/45448) ([Azat Khuzhin](https://github.com/azat)). +* Fix disabled two-level aggregation from HTTP. [#45450](https://github.com/ClickHouse/ClickHouse/pull/45450) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible deadlock with `allow_asynchronous_read_from_io_pool_for_merge_tree` enabled in case of exception from `ThreadPool::schedule`. [#45481](https://github.com/ClickHouse/ClickHouse/pull/45481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix possible in-use table after DETACH. [#45493](https://github.com/ClickHouse/ClickHouse/pull/45493) ([Azat Khuzhin](https://github.com/azat)). +* Fix rare abort in case when query is canceled and parallel parsing was used during its execution. [#45498](https://github.com/ClickHouse/ClickHouse/pull/45498) ([Anton Popov](https://github.com/CurtizJ)). +* Fix a race between Distributed table creation and INSERT into it (could lead to CANNOT_LINK during INSERT into the table). [#45502](https://github.com/ClickHouse/ClickHouse/pull/45502) ([Azat Khuzhin](https://github.com/azat)). +* Add proper default (SLRU) to cache policy getter. Closes [#45514](https://github.com/ClickHouse/ClickHouse/issues/45514). [#45524](https://github.com/ClickHouse/ClickHouse/pull/45524) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove AST-based optimization `optimize_fuse_sum_count_avg`, close [#45439](https://github.com/ClickHouse/ClickHouse/issues/45439). [#45558](https://github.com/ClickHouse/ClickHouse/pull/45558) ([Vladimir C](https://github.com/vdimir)). + +#### Bug-fix + +* Disallow arrayjoin in mutations closes [#42637](https://github.com/ClickHouse/ClickHouse/issues/42637) Implementation: * Added a new parameter to ActionsVisitor::Data disallow_arrayjoin, which is set by MutationsIterator when it appends expression. * ActionsVisitor uses disallow_arrayjoin and throws error when its used with mutations. Testing: * Added test for the same 02504_disallow_arrayjoin_in_mutations.sql. [#44447](https://github.com/ClickHouse/ClickHouse/pull/44447) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix for qualified asterisks with alias table name and column transformer resolves [#44736](https://github.com/ClickHouse/ClickHouse/issues/44736). [#44755](https://github.com/ClickHouse/ClickHouse/pull/44755) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Updated backup/restore status when concurrent backups & restores are not allowed resolves [#45486](https://github.com/ClickHouse/ClickHouse/issues/45486) Implementation: * Moved concurrent backup/restore check inside try-catch block which sets the status so that other nodes in cluster are aware of failures. * Renamed backup_uuid to restore_uuid in RestoreSettings. [#45497](https://github.com/ClickHouse/ClickHouse/pull/45497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +#### Build Improvement + +* crc32 fix for s390x. [#43706](https://github.com/ClickHouse/ClickHouse/pull/43706) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fixed endian issues in transform function for s390x. [#45522](https://github.com/ClickHouse/ClickHouse/pull/45522) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### Feature + +* Record server startup time in ProfileEvents resolves [#43188](https://github.com/ClickHouse/ClickHouse/issues/43188) Implementation: * Added ProfileEvents::ServerStartupMilliseconds. * Recorded time from start of main till listening to sockets. Testing: * Added a test 02532_profileevents_server_startup_time.sql. [#45250](https://github.com/ClickHouse/ClickHouse/pull/45250) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "If user only need virtual columns, we don't need to initialize ReadBufferFromS3"'. [#44939](https://github.com/ClickHouse/ClickHouse/pull/44939) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Revert "Custom reading for mutation"'. [#45121](https://github.com/ClickHouse/ClickHouse/pull/45121) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Revert "Custom reading for mutation""'. [#45122](https://github.com/ClickHouse/ClickHouse/pull/45122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "update function DAYOFWEEK and add new function WEEKDAY for mysql/spark compatiability"'. [#45221](https://github.com/ClickHouse/ClickHouse/pull/45221) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Validate function arguments in query tree"'. [#45299](https://github.com/ClickHouse/ClickHouse/pull/45299) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Revert "Validate function arguments in query tree""'. [#45300](https://github.com/ClickHouse/ClickHouse/pull/45300) ([Maksim Kita](https://github.com/kitaisreal)). +* NO CL ENTRY: 'Revert "Support optimize_or_like_chain in QueryTreePassManager"'. [#45406](https://github.com/ClickHouse/ClickHouse/pull/45406) ([Anton Popov](https://github.com/CurtizJ)). +* NO CL ENTRY: 'Resubmit Support optimize_or_like_chain in QueryTreePassManager'. [#45410](https://github.com/ClickHouse/ClickHouse/pull/45410) ([Dmitry Novik](https://github.com/novikd)). +* NO CL ENTRY: 'Revert "Remove redundant sorting"'. [#45414](https://github.com/ClickHouse/ClickHouse/pull/45414) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Automatically merge green backport PRs and green approved PRs [#41110](https://github.com/ClickHouse/ClickHouse/pull/41110) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix assertion in async read buffer from remote [#41231](https://github.com/ClickHouse/ClickHouse/pull/41231) ([Kseniia Sumarokova](https://github.com/kssenii)). +* add retries on ConnectionError [#42991](https://github.com/ClickHouse/ClickHouse/pull/42991) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update aws-c* submodules [#43020](https://github.com/ClickHouse/ClickHouse/pull/43020) ([Vitaly Baranov](https://github.com/vitlibar)). +* Replace domain IP types (IPv4, IPv6) with native [#43221](https://github.com/ClickHouse/ClickHouse/pull/43221) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix aggregate functions optimisation in AggregateFunctionsArithmericOperationsPass [#43372](https://github.com/ClickHouse/ClickHouse/pull/43372) ([Dmitry Novik](https://github.com/novikd)). +* Improve pytest --pdb experience by preserving dockerd on SIGINT [#43392](https://github.com/ClickHouse/ClickHouse/pull/43392) ([Azat Khuzhin](https://github.com/azat)). +* RFC: tests: add stacktraces for hunged queries [#43396](https://github.com/ClickHouse/ClickHouse/pull/43396) ([Azat Khuzhin](https://github.com/azat)). +* Followup fixes for systemd notification ([#43400](https://github.com/ClickHouse/ClickHouse/issues/43400)) [#43597](https://github.com/ClickHouse/ClickHouse/pull/43597) ([Alexander Gololobov](https://github.com/davenger)). +* Refactor FunctionNode [#43761](https://github.com/ClickHouse/ClickHouse/pull/43761) ([Dmitry Novik](https://github.com/novikd)). +* Some cleanup: grace hash join [#43851](https://github.com/ClickHouse/ClickHouse/pull/43851) ([Igor Nikonov](https://github.com/devcrafter)). +* Temporary files evict fs cache - 2nd approach [#43972](https://github.com/ClickHouse/ClickHouse/pull/43972) ([Vladimir C](https://github.com/vdimir)). +* Randomize setting `enable_memory_bound_merging_of_aggregation_results` in tests [#43986](https://github.com/ClickHouse/ClickHouse/pull/43986) ([Nikita Taranov](https://github.com/nickitat)). +* Analyzer aggregate functions passes small fixes [#44013](https://github.com/ClickHouse/ClickHouse/pull/44013) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix wrong char in command [#44018](https://github.com/ClickHouse/ClickHouse/pull/44018) ([alesapin](https://github.com/alesapin)). +* Analyzer support Set index [#44097](https://github.com/ClickHouse/ClickHouse/pull/44097) ([Maksim Kita](https://github.com/kitaisreal)). +* Provide monotonicity info for `toUnixTimestamp64*` [#44116](https://github.com/ClickHouse/ClickHouse/pull/44116) ([Nikita Taranov](https://github.com/nickitat)). +* Avoid loading toolchain files multiple times [#44122](https://github.com/ClickHouse/ClickHouse/pull/44122) ([Azat Khuzhin](https://github.com/azat)). +* tests: exclude flaky columns from SHOW CLUSTERS test [#44123](https://github.com/ClickHouse/ClickHouse/pull/44123) ([Azat Khuzhin](https://github.com/azat)). +* Bump libdivide (to gain some new optimizations) [#44132](https://github.com/ClickHouse/ClickHouse/pull/44132) ([Azat Khuzhin](https://github.com/azat)). +* Make atomic counter relaxed in blockNumber() [#44193](https://github.com/ClickHouse/ClickHouse/pull/44193) ([Igor Nikonov](https://github.com/devcrafter)). +* Try fix flaky 01072_window_view_multiple_columns_groupby [#44195](https://github.com/ClickHouse/ClickHouse/pull/44195) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Apply new code of named collections (from [#43147](https://github.com/ClickHouse/ClickHouse/issues/43147)) to external table engines part 1 [#44204](https://github.com/ClickHouse/ClickHouse/pull/44204) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add some settings under `compatibility` [#44209](https://github.com/ClickHouse/ClickHouse/pull/44209) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Recommend Slack over Telegram in the "Question" issue template [#44222](https://github.com/ClickHouse/ClickHouse/pull/44222) ([Ivan Blinkov](https://github.com/blinkov)). +* Forbid paths in timezone names [#44225](https://github.com/ClickHouse/ClickHouse/pull/44225) ([Kruglov Pavel](https://github.com/Avogar)). +* Analyzer storage view crash fix [#44230](https://github.com/ClickHouse/ClickHouse/pull/44230) ([Maksim Kita](https://github.com/kitaisreal)). +* Add ThreadsInOvercommitTracker metric [#44233](https://github.com/ClickHouse/ClickHouse/pull/44233) ([Dmitry Novik](https://github.com/novikd)). +* Analyzer expired Context crash fix [#44234](https://github.com/ClickHouse/ClickHouse/pull/44234) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix grace join memory consumption, pt1 [#44238](https://github.com/ClickHouse/ClickHouse/pull/44238) ([Vladimir C](https://github.com/vdimir)). +* Fixed use-after-free of BLAKE3 error message [#44242](https://github.com/ClickHouse/ClickHouse/pull/44242) ([Joanna Hulboj](https://github.com/jh0x)). +* Fix deadlock in StorageSystemDatabases [#44272](https://github.com/ClickHouse/ClickHouse/pull/44272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of global Git object [#44273](https://github.com/ClickHouse/ClickHouse/pull/44273) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version after release [#44275](https://github.com/ClickHouse/ClickHouse/pull/44275) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.12.1.1752-stable [#44281](https://github.com/ClickHouse/ClickHouse/pull/44281) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Do not hold data parts during insert [#44299](https://github.com/ClickHouse/ClickHouse/pull/44299) ([Anton Popov](https://github.com/CurtizJ)). +* Another fix `test_server_reload` [#44306](https://github.com/ClickHouse/ClickHouse/pull/44306) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v22.9.7.34-stable [#44309](https://github.com/ClickHouse/ClickHouse/pull/44309) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* tests/perf: fix dependency check during DROP [#44312](https://github.com/ClickHouse/ClickHouse/pull/44312) ([Azat Khuzhin](https://github.com/azat)). +* (unused openssl integration, not for production) a follow-up [#44325](https://github.com/ClickHouse/ClickHouse/pull/44325) ([Boris Kuschel](https://github.com/bkuschel)). +* Replace old named collections code with new (from [#43147](https://github.com/ClickHouse/ClickHouse/issues/43147)) part 2 [#44327](https://github.com/ClickHouse/ClickHouse/pull/44327) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable "git-import" test in debug mode [#44328](https://github.com/ClickHouse/ClickHouse/pull/44328) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check s3 part upload settings [#44335](https://github.com/ClickHouse/ClickHouse/pull/44335) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix typo [#44337](https://github.com/ClickHouse/ClickHouse/pull/44337) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for PowerBI [#44338](https://github.com/ClickHouse/ClickHouse/pull/44338) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#36038](https://github.com/ClickHouse/ClickHouse/issues/36038) [#44339](https://github.com/ClickHouse/ClickHouse/pull/44339) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#29386](https://github.com/ClickHouse/ClickHouse/issues/29386) [#44340](https://github.com/ClickHouse/ClickHouse/pull/44340) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#22929](https://github.com/ClickHouse/ClickHouse/issues/22929) [#44341](https://github.com/ClickHouse/ClickHouse/pull/44341) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#29883](https://github.com/ClickHouse/ClickHouse/issues/29883) [#44342](https://github.com/ClickHouse/ClickHouse/pull/44342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Docker [#44343](https://github.com/ClickHouse/ClickHouse/pull/44343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fix flack test "02481_async_insert_dedup.python" [#44349](https://github.com/ClickHouse/ClickHouse/pull/44349) ([Han Fei](https://github.com/hanfei1991)). +* Add a test for [#22160](https://github.com/ClickHouse/ClickHouse/issues/22160) [#44355](https://github.com/ClickHouse/ClickHouse/pull/44355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34708](https://github.com/ClickHouse/ClickHouse/issues/34708) [#44356](https://github.com/ClickHouse/ClickHouse/pull/44356) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#30679](https://github.com/ClickHouse/ClickHouse/issues/30679) [#44357](https://github.com/ClickHouse/ClickHouse/pull/44357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34669](https://github.com/ClickHouse/ClickHouse/issues/34669) [#44359](https://github.com/ClickHouse/ClickHouse/pull/44359) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34724](https://github.com/ClickHouse/ClickHouse/issues/34724) [#44360](https://github.com/ClickHouse/ClickHouse/pull/44360) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try restarting ZK cluster on failed connection in `test_keeper_zookeeper_converted` [#44363](https://github.com/ClickHouse/ClickHouse/pull/44363) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable grase_hash in test 00172_parallel_join [#44367](https://github.com/ClickHouse/ClickHouse/pull/44367) ([Vladimir C](https://github.com/vdimir)). +* Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Lock table for share during startup for database ordinary [#44393](https://github.com/ClickHouse/ClickHouse/pull/44393) ([alesapin](https://github.com/alesapin)). +* Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try fix some tests [#44406](https://github.com/ClickHouse/ClickHouse/pull/44406) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better ParserAllCollectionsOfLiterals [#44408](https://github.com/ClickHouse/ClickHouse/pull/44408) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix bug with merge/mutate pool size increase [#44436](https://github.com/ClickHouse/ClickHouse/pull/44436) ([alesapin](https://github.com/alesapin)). +* Update 01072_window_view_multiple_columns_groupby.sh [#44438](https://github.com/ClickHouse/ClickHouse/pull/44438) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable buggy tsan assertion for integration test [#44444](https://github.com/ClickHouse/ClickHouse/pull/44444) ([alesapin](https://github.com/alesapin)). +* Respect setting settings.schema_inference_make_columns_nullable in Parquet/ORC/Arrow formats [#44446](https://github.com/ClickHouse/ClickHouse/pull/44446) ([Kruglov Pavel](https://github.com/Avogar)). +* Add tests as examples with errors of date(time) and string comparison that we should eliminate [#44462](https://github.com/ClickHouse/ClickHouse/pull/44462) ([Ilya Yatsishin](https://github.com/qoega)). +* Parallel parts cleanup with zero copy replication [#44466](https://github.com/ClickHouse/ClickHouse/pull/44466) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect usages of `getPartName()` [#44468](https://github.com/ClickHouse/ClickHouse/pull/44468) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test `roaring_memory_tracking` [#44470](https://github.com/ClickHouse/ClickHouse/pull/44470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Clarify query_id in test 01092_memory_profiler [#44483](https://github.com/ClickHouse/ClickHouse/pull/44483) ([Vladimir C](https://github.com/vdimir)). +* Default value for optional in SortNode::updateTreeHashImpl [#44491](https://github.com/ClickHouse/ClickHouse/pull/44491) ([Vladimir C](https://github.com/vdimir)). +* Do not try to remove WAL/move broken parts for static storage [#44495](https://github.com/ClickHouse/ClickHouse/pull/44495) ([Azat Khuzhin](https://github.com/azat)). +* Removed parent pid check that breaks in containers [#44499](https://github.com/ClickHouse/ClickHouse/pull/44499) ([Alexander Gololobov](https://github.com/davenger)). +* Analyzer duplicate alias crash fix [#44508](https://github.com/ClickHouse/ClickHouse/pull/44508) ([Maksim Kita](https://github.com/kitaisreal)). +* Minor code polishing [#44513](https://github.com/ClickHouse/ClickHouse/pull/44513) ([alesapin](https://github.com/alesapin)). +* Better error message if named collection does not exist [#44517](https://github.com/ClickHouse/ClickHouse/pull/44517) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add the lambda to collect data for workflow_jobs [#44520](https://github.com/ClickHouse/ClickHouse/pull/44520) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Introduce groupArrayLast() (useful to store last X values) [#44521](https://github.com/ClickHouse/ClickHouse/pull/44521) ([Azat Khuzhin](https://github.com/azat)). +* Infer numbers starting from zero as strings in TSV [#44522](https://github.com/ClickHouse/ClickHouse/pull/44522) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix wrong condition for enabling async reading from MergeTree. [#44530](https://github.com/ClickHouse/ClickHouse/pull/44530) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* tests: capture dmesg in integration tests [#44535](https://github.com/ClickHouse/ClickHouse/pull/44535) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer support distributed queries processing [#44540](https://github.com/ClickHouse/ClickHouse/pull/44540) ([Maksim Kita](https://github.com/kitaisreal)). +* Followup [#43761](https://github.com/ClickHouse/ClickHouse/issues/43761) [#44541](https://github.com/ClickHouse/ClickHouse/pull/44541) ([Dmitry Novik](https://github.com/novikd)). +* Drop unused columns after join on/using [#44545](https://github.com/ClickHouse/ClickHouse/pull/44545) ([Vladimir C](https://github.com/vdimir)). +* Improve inferring arrays with nulls in JSON formats [#44550](https://github.com/ClickHouse/ClickHouse/pull/44550) ([Kruglov Pavel](https://github.com/Avogar)). +* Make BC check optional (if env var set) [#44564](https://github.com/ClickHouse/ClickHouse/pull/44564) ([alesapin](https://github.com/alesapin)). +* Fix extremely slow stack traces in debug build [#44569](https://github.com/ClickHouse/ClickHouse/pull/44569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better command line argument name in `clickhouse-benchmark` [#44570](https://github.com/ClickHouse/ClickHouse/pull/44570) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix HDFS test [#44572](https://github.com/ClickHouse/ClickHouse/pull/44572) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_distributed_queries_stress [#44573](https://github.com/ClickHouse/ClickHouse/pull/44573) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Switch "contrib/sysroot" back to master. [#44574](https://github.com/ClickHouse/ClickHouse/pull/44574) ([Vitaly Baranov](https://github.com/vitlibar)). +* Non-significant changes [#44575](https://github.com/ClickHouse/ClickHouse/pull/44575) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fuzzer HTML: fix trash [#44580](https://github.com/ClickHouse/ClickHouse/pull/44580) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better diagnostics on server stop for the stress test [#44593](https://github.com/ClickHouse/ClickHouse/pull/44593) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The position of the log message about the server environment was wrong [#44595](https://github.com/ClickHouse/ClickHouse/pull/44595) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad punctuation in log [#44596](https://github.com/ClickHouse/ClickHouse/pull/44596) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix misleading log message [#44598](https://github.com/ClickHouse/ClickHouse/pull/44598) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad log message about MergeTree metadata cache. [#44599](https://github.com/ClickHouse/ClickHouse/pull/44599) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly cleanup interactive line reader code [#44601](https://github.com/ClickHouse/ClickHouse/pull/44601) ([Azat Khuzhin](https://github.com/azat)). +* Rename `runlog.log` to `run.log` in tests [#44603](https://github.com/ClickHouse/ClickHouse/pull/44603) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix hung query in stress test [#44604](https://github.com/ClickHouse/ClickHouse/pull/44604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve variable name [#44605](https://github.com/ClickHouse/ClickHouse/pull/44605) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Faster server startup after stress test [#44606](https://github.com/ClickHouse/ClickHouse/pull/44606) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix log messages in Coordination [#44607](https://github.com/ClickHouse/ClickHouse/pull/44607) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable Analyzer in fuzz and stress tests [#44609](https://github.com/ClickHouse/ClickHouse/pull/44609) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better log message [#44610](https://github.com/ClickHouse/ClickHouse/pull/44610) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe fix a bogus MSan error [#44611](https://github.com/ClickHouse/ClickHouse/pull/44611) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "too large allocation" message from MSan [#44613](https://github.com/ClickHouse/ClickHouse/pull/44613) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not fail the AST fuzzer if sanitizer is out of memory [#44616](https://github.com/ClickHouse/ClickHouse/pull/44616) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01111_create_drop_replicated_db_stress` [#44617](https://github.com/ClickHouse/ClickHouse/pull/44617) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* tests/integration: suppress exceptions during logging (due to pytest) [#44618](https://github.com/ClickHouse/ClickHouse/pull/44618) ([Azat Khuzhin](https://github.com/azat)). +* Fix rust modules rebuild (previously ignores changes in cargo config.toml) [#44623](https://github.com/ClickHouse/ClickHouse/pull/44623) ([Azat Khuzhin](https://github.com/azat)). +* Sometimes spot instances fail more than 20 times in a row [#44626](https://github.com/ClickHouse/ClickHouse/pull/44626) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix restart after quorum insert [#44628](https://github.com/ClickHouse/ClickHouse/pull/44628) ([alesapin](https://github.com/alesapin)). +* Revert "Merge pull request [#38953](https://github.com/ClickHouse/ClickHouse/issues/38953) from ClickHouse/add-allocation-ptr-to-trace-log [#44629](https://github.com/ClickHouse/ClickHouse/pull/44629) ([Raúl Marín](https://github.com/Algunenano)). +* Fix lambdas parsing [#44639](https://github.com/ClickHouse/ClickHouse/pull/44639) ([Nikolay Degterinsky](https://github.com/evillique)). +* Function viewExplain accept SELECT and settings [#44641](https://github.com/ClickHouse/ClickHouse/pull/44641) ([Vladimir C](https://github.com/vdimir)). +* Fix test `02015_async_inserts_2` [#44642](https://github.com/ClickHouse/ClickHouse/pull/44642) ([Anton Popov](https://github.com/CurtizJ)). +* Fix flaky test `test_keeper_multinode_simple` [#44645](https://github.com/ClickHouse/ClickHouse/pull/44645) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add +x flag for run-fuzzer.sh [#44649](https://github.com/ClickHouse/ClickHouse/pull/44649) ([alesapin](https://github.com/alesapin)). +* Custom reading for mutation [#44653](https://github.com/ClickHouse/ClickHouse/pull/44653) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix flaky test test_backup_restore_on_cluster [#44660](https://github.com/ClickHouse/ClickHouse/pull/44660) ([Vitaly Baranov](https://github.com/vitlibar)). +* tests/integration: add missing kazoo client termination [#44666](https://github.com/ClickHouse/ClickHouse/pull/44666) ([Azat Khuzhin](https://github.com/azat)). +* Move dmesg dumping out from runner to ci-runner.py [#44667](https://github.com/ClickHouse/ClickHouse/pull/44667) ([Azat Khuzhin](https://github.com/azat)). +* Remove questdb (it makes a little sense but the test was flaky) [#44669](https://github.com/ClickHouse/ClickHouse/pull/44669) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix minor typo: replace validate_bugix_check with validate_bugfix_check [#44672](https://github.com/ClickHouse/ClickHouse/pull/44672) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Fix parsing of ANY operator [#44678](https://github.com/ClickHouse/ClickHouse/pull/44678) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix test `01130_in_memory_parts` [#44683](https://github.com/ClickHouse/ClickHouse/pull/44683) ([Anton Popov](https://github.com/CurtizJ)). +* Remove old code [#44685](https://github.com/ClickHouse/ClickHouse/pull/44685) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test git-import [#44687](https://github.com/ClickHouse/ClickHouse/pull/44687) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve odbc test [#44688](https://github.com/ClickHouse/ClickHouse/pull/44688) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add retries to HTTP requests in ClickHouse test [#44689](https://github.com/ClickHouse/ClickHouse/pull/44689) ([alesapin](https://github.com/alesapin)). +* Fix flaky tests [#44690](https://github.com/ClickHouse/ClickHouse/pull/44690) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix flaky test "01502_long_log_tinylog_deadlock_race" [#44693](https://github.com/ClickHouse/ClickHouse/pull/44693) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve handling of old parts [#44694](https://github.com/ClickHouse/ClickHouse/pull/44694) ([Raúl Marín](https://github.com/Algunenano)). +* Update entrypoint.sh [#44699](https://github.com/ClickHouse/ClickHouse/pull/44699) ([Denny Crane](https://github.com/den-crane)). +* tests: more fixes for test_keeper_auth [#44702](https://github.com/ClickHouse/ClickHouse/pull/44702) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash on delete from materialized view [#44705](https://github.com/ClickHouse/ClickHouse/pull/44705) ([Alexander Gololobov](https://github.com/davenger)). +* Fix flaky filelog tests with database ordinary [#44706](https://github.com/ClickHouse/ClickHouse/pull/44706) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make lightweight deletes always synchronous [#44718](https://github.com/ClickHouse/ClickHouse/pull/44718) ([Alexander Gololobov](https://github.com/davenger)). +* Fix deadlock in attach thread [#44719](https://github.com/ClickHouse/ClickHouse/pull/44719) ([alesapin](https://github.com/alesapin)). +* A few improvements to AST Fuzzer [#44720](https://github.com/ClickHouse/ClickHouse/pull/44720) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test [#44721](https://github.com/ClickHouse/ClickHouse/pull/44721) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Rename log in stress test [#44722](https://github.com/ClickHouse/ClickHouse/pull/44722) ([alesapin](https://github.com/alesapin)). +* Debug deadlock in stress test [#44723](https://github.com/ClickHouse/ClickHouse/pull/44723) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test "02102_row_binary_with_names_and_types.sh" [#44724](https://github.com/ClickHouse/ClickHouse/pull/44724) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better some tests [#44725](https://github.com/ClickHouse/ClickHouse/pull/44725) ([alesapin](https://github.com/alesapin)). +* Fix cases when clickhouse-server takes long time to start in functional tests with MSan [#44726](https://github.com/ClickHouse/ClickHouse/pull/44726) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Perf test: Log the time spent waiting for file sync [#44737](https://github.com/ClickHouse/ClickHouse/pull/44737) ([Raúl Marín](https://github.com/Algunenano)). +* Fix flaky test 02448_clone_replica_lost_part [#44759](https://github.com/ClickHouse/ClickHouse/pull/44759) ([alesapin](https://github.com/alesapin)). +* Build rust modules from the binary directory [#44762](https://github.com/ClickHouse/ClickHouse/pull/44762) ([Azat Khuzhin](https://github.com/azat)). +* Remove database ordinary from stress test [#44763](https://github.com/ClickHouse/ClickHouse/pull/44763) ([alesapin](https://github.com/alesapin)). +* Fix flaky test 02479_mysql_connect_to_self [#44768](https://github.com/ClickHouse/ClickHouse/pull/44768) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Print fatal messages in Fuzzer [#44769](https://github.com/ClickHouse/ClickHouse/pull/44769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect docs [#44795](https://github.com/ClickHouse/ClickHouse/pull/44795) ([Kruglov Pavel](https://github.com/Avogar)). +* Added table name to error message [#44806](https://github.com/ClickHouse/ClickHouse/pull/44806) ([Alexander Gololobov](https://github.com/davenger)). +* Retry packages download if GitHub returned HTTP 500. [#44807](https://github.com/ClickHouse/ClickHouse/pull/44807) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better docs [#44808](https://github.com/ClickHouse/ClickHouse/pull/44808) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix total trash in stress test [#44810](https://github.com/ClickHouse/ClickHouse/pull/44810) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ASan builds for glibc 2.36+ [#44811](https://github.com/ClickHouse/ClickHouse/pull/44811) ([Azat Khuzhin](https://github.com/azat)). +* Remove the remainings of TestFlows [#44812](https://github.com/ClickHouse/ClickHouse/pull/44812) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `grep` [#44813](https://github.com/ClickHouse/ClickHouse/pull/44813) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad cast in monotonicity analysis [#44818](https://github.com/ClickHouse/ClickHouse/pull/44818) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modern tools, part 1 [#44819](https://github.com/ClickHouse/ClickHouse/pull/44819) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modern tools in CI, part 2. [#44820](https://github.com/ClickHouse/ClickHouse/pull/44820) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data race in DDLWorker [#44821](https://github.com/ClickHouse/ClickHouse/pull/44821) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix tests for bridges [#44822](https://github.com/ClickHouse/ClickHouse/pull/44822) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test_multiple_disks::test_jbod_overflow [#44823](https://github.com/ClickHouse/ClickHouse/pull/44823) ([Azat Khuzhin](https://github.com/azat)). +* Less OOM in stress test [#44824](https://github.com/ClickHouse/ClickHouse/pull/44824) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix misleading integration tests reports for parametrized tests [#44825](https://github.com/ClickHouse/ClickHouse/pull/44825) ([Azat Khuzhin](https://github.com/azat)). +* Fix two typos [#44826](https://github.com/ClickHouse/ClickHouse/pull/44826) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Adjust CSS [#44829](https://github.com/ClickHouse/ClickHouse/pull/44829) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fuzzer report [#44830](https://github.com/ClickHouse/ClickHouse/pull/44830) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* check-style: check base for std::cerr/cout too [#44833](https://github.com/ClickHouse/ClickHouse/pull/44833) ([Azat Khuzhin](https://github.com/azat)). +* Try fixing `test_keeper_snapshot_small_distance` with ZK restart [#44834](https://github.com/ClickHouse/ClickHouse/pull/44834) ([Antonio Andelic](https://github.com/antonio2368)). +* Exclude cargo shared libraries from the artifacts [#44836](https://github.com/ClickHouse/ClickHouse/pull/44836) ([Azat Khuzhin](https://github.com/azat)). +* Add a tiny but important logging [#44837](https://github.com/ClickHouse/ClickHouse/pull/44837) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Escape submodules in style-check [#44838](https://github.com/ClickHouse/ClickHouse/pull/44838) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Move `test_dies_with_parent` to another module [#44839](https://github.com/ClickHouse/ClickHouse/pull/44839) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove unneeded softlink to official dev docs [#44841](https://github.com/ClickHouse/ClickHouse/pull/44841) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix data race in StorageS3 [#44842](https://github.com/ClickHouse/ClickHouse/pull/44842) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix rare race which can lead to queue hang [#44847](https://github.com/ClickHouse/ClickHouse/pull/44847) ([alesapin](https://github.com/alesapin)). +* No more retries in integration tests [#44851](https://github.com/ClickHouse/ClickHouse/pull/44851) ([Ilya Yatsishin](https://github.com/qoega)). +* Document usage of check_cxx_source_compiles instead of check_cxx_source_runs [#44854](https://github.com/ClickHouse/ClickHouse/pull/44854) ([Robert Schulze](https://github.com/rschu1ze)). +* More cases of OOM in Fuzzer [#44855](https://github.com/ClickHouse/ClickHouse/pull/44855) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix: sorted DISTINCT with empty string [#44856](https://github.com/ClickHouse/ClickHouse/pull/44856) ([Igor Nikonov](https://github.com/devcrafter)). +* Try to fix MSan build [#44857](https://github.com/ClickHouse/ClickHouse/pull/44857) ([Nikolay Degterinsky](https://github.com/evillique)). +* Cleanup setup_minio.sh [#44858](https://github.com/ClickHouse/ClickHouse/pull/44858) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Wait for ZK process to stop in tests using snapshot [#44859](https://github.com/ClickHouse/ClickHouse/pull/44859) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test and several typos [#44870](https://github.com/ClickHouse/ClickHouse/pull/44870) ([alesapin](https://github.com/alesapin)). +* Upload status files to S3 report for bugfix check [#44871](https://github.com/ClickHouse/ClickHouse/pull/44871) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky test `02503_insert_storage_snapshot` [#44873](https://github.com/ClickHouse/ClickHouse/pull/44873) ([alesapin](https://github.com/alesapin)). +* Revert some changes from [#42777](https://github.com/ClickHouse/ClickHouse/issues/42777) to fix performance tests [#44876](https://github.com/ClickHouse/ClickHouse/pull/44876) ([Kruglov Pavel](https://github.com/Avogar)). +* Rewrite test_postgres_protocol test [#44880](https://github.com/ClickHouse/ClickHouse/pull/44880) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix ConcurrentBoundedQueue::emplace() return value in case of finished queue [#44881](https://github.com/ClickHouse/ClickHouse/pull/44881) ([Azat Khuzhin](https://github.com/azat)). +* Validate function arguments in query tree [#44882](https://github.com/ClickHouse/ClickHouse/pull/44882) ([Dmitry Novik](https://github.com/novikd)). +* Rework CI reports to have a class and clarify the logic [#44883](https://github.com/ClickHouse/ClickHouse/pull/44883) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* fix-typo [#44886](https://github.com/ClickHouse/ClickHouse/pull/44886) ([Enrique Herreros](https://github.com/eherrerosj)). +* Store ZK generated data in `test_keeper_snapshot_small_distance` [#44888](https://github.com/ClickHouse/ClickHouse/pull/44888) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix "AttributeError: 'BuildResult' object has no attribute 'libraries'" in BuilderReport and BuilderSpecialReport [#44890](https://github.com/ClickHouse/ClickHouse/pull/44890) ([Robert Schulze](https://github.com/rschu1ze)). +* Convert integration test_dictionaries_update_field to a stateless [#44891](https://github.com/ClickHouse/ClickHouse/pull/44891) ([Azat Khuzhin](https://github.com/azat)). +* Upgrade googletest to latest HEAD [#44894](https://github.com/ClickHouse/ClickHouse/pull/44894) ([Robert Schulze](https://github.com/rschu1ze)). +* Try fix rabbitmq potential leak [#44897](https://github.com/ClickHouse/ClickHouse/pull/44897) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Try to fix flaky `test_storage_kafka::test_kafka_produce_key_timestamp` [#44898](https://github.com/ClickHouse/ClickHouse/pull/44898) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky `test_concurrent_queries_restriction_by_query_kind` [#44903](https://github.com/ClickHouse/ClickHouse/pull/44903) ([Antonio Andelic](https://github.com/antonio2368)). +* Avoid Keeper crash on shutdown (fix `test_keeper_snapshot_on_exit`) [#44908](https://github.com/ClickHouse/ClickHouse/pull/44908) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not merge over a gap with outdated undeleted parts [#44909](https://github.com/ClickHouse/ClickHouse/pull/44909) ([Sema Checherinda](https://github.com/CheSema)). +* Fix logging message in MergeTreeDataMergerMutator (about merged parts) [#44917](https://github.com/ClickHouse/ClickHouse/pull/44917) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test `test_lost_part` [#44921](https://github.com/ClickHouse/ClickHouse/pull/44921) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add fast and cancellable shared_mutex alternatives [#44924](https://github.com/ClickHouse/ClickHouse/pull/44924) ([Sergei Trifonov](https://github.com/serxa)). +* Fix deadlock in Keeper's changelog [#44937](https://github.com/ClickHouse/ClickHouse/pull/44937) ([Antonio Andelic](https://github.com/antonio2368)). +* Stop merges to avoid a race between merge and freeze. [#44938](https://github.com/ClickHouse/ClickHouse/pull/44938) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix memory leak in Aws::InitAPI [#44942](https://github.com/ClickHouse/ClickHouse/pull/44942) ([Vitaly Baranov](https://github.com/vitlibar)). +* Change error code on invalid background_pool_size config [#44947](https://github.com/ClickHouse/ClickHouse/pull/44947) ([Raúl Marín](https://github.com/Algunenano)). +* Fix exception fix in TraceCollector dtor [#44948](https://github.com/ClickHouse/ClickHouse/pull/44948) ([Robert Schulze](https://github.com/rschu1ze)). +* Parallel distributed insert select with s3Cluster [3] [#44955](https://github.com/ClickHouse/ClickHouse/pull/44955) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Do not check read result consistency when unwinding [#44956](https://github.com/ClickHouse/ClickHouse/pull/44956) ([Alexander Gololobov](https://github.com/davenger)). +* Up the log level of tables dependencies graphs [#44957](https://github.com/ClickHouse/ClickHouse/pull/44957) ([Vitaly Baranov](https://github.com/vitlibar)). +* Hipster's HTML [#44961](https://github.com/ClickHouse/ClickHouse/pull/44961) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Docs: Mention non-standard DOTALL behavior of ClickHouse's match() [#44977](https://github.com/ClickHouse/ClickHouse/pull/44977) ([Robert Schulze](https://github.com/rschu1ze)). +* tests: fix test_replicated_users flakiness [#44978](https://github.com/ClickHouse/ClickHouse/pull/44978) ([Azat Khuzhin](https://github.com/azat)). +* Check what if disable some checks in storage Merge. [#44983](https://github.com/ClickHouse/ClickHouse/pull/44983) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix check for not existing input in ActionsDAG [#44987](https://github.com/ClickHouse/ClickHouse/pull/44987) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version_date.tsv and changelogs after v22.12.2.25-stable [#44988](https://github.com/ClickHouse/ClickHouse/pull/44988) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix test test_grpc_protocol/test.py::test_progress [#44996](https://github.com/ClickHouse/ClickHouse/pull/44996) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve S3 EC2 metadata tests [#45001](https://github.com/ClickHouse/ClickHouse/pull/45001) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix minmax_count_projection with _partition_value [#45003](https://github.com/ClickHouse/ClickHouse/pull/45003) ([Amos Bird](https://github.com/amosbird)). +* Fix strange trash in Fuzzer [#45006](https://github.com/ClickHouse/ClickHouse/pull/45006) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `dmesg.log` to Fuzzer [#45008](https://github.com/ClickHouse/ClickHouse/pull/45008) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `01961_roaring_memory_tracking` test, again [#45009](https://github.com/ClickHouse/ClickHouse/pull/45009) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Recognize more ok cases for Fuzzer [#45012](https://github.com/ClickHouse/ClickHouse/pull/45012) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Supposedly fix the "Download script failed" error [#45013](https://github.com/ClickHouse/ClickHouse/pull/45013) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add snapshot creation retry in Keeper tests using ZooKeeper [#45016](https://github.com/ClickHouse/ClickHouse/pull/45016) ([Antonio Andelic](https://github.com/antonio2368)). +* test for [#20098](https://github.com/ClickHouse/ClickHouse/issues/20098) [#45017](https://github.com/ClickHouse/ClickHouse/pull/45017) ([Denny Crane](https://github.com/den-crane)). +* test for [#26473](https://github.com/ClickHouse/ClickHouse/issues/26473) [#45018](https://github.com/ClickHouse/ClickHouse/pull/45018) ([Denny Crane](https://github.com/den-crane)). +* Remove the remainings of Testflows (2). [#45021](https://github.com/ClickHouse/ClickHouse/pull/45021) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable the check that was commented [#45022](https://github.com/ClickHouse/ClickHouse/pull/45022) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix false positive in Fuzzer [#45025](https://github.com/ClickHouse/ClickHouse/pull/45025) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix false positive in Fuzzer, alternative variant [#45026](https://github.com/ClickHouse/ClickHouse/pull/45026) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix function `range` (the bug was unreleased) [#45030](https://github.com/ClickHouse/ClickHouse/pull/45030) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix OOM in Fuzzer [#45032](https://github.com/ClickHouse/ClickHouse/pull/45032) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less OOM in Stress test [#45033](https://github.com/ClickHouse/ClickHouse/pull/45033) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#31361](https://github.com/ClickHouse/ClickHouse/issues/31361) [#45034](https://github.com/ClickHouse/ClickHouse/pull/45034) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#38729](https://github.com/ClickHouse/ClickHouse/issues/38729) [#45035](https://github.com/ClickHouse/ClickHouse/pull/45035) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix typos [#45036](https://github.com/ClickHouse/ClickHouse/pull/45036) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* I didn't understand the logic of this test, @azat [#45037](https://github.com/ClickHouse/ClickHouse/pull/45037) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fixes for Coordination unit tests [#45039](https://github.com/ClickHouse/ClickHouse/pull/45039) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test (hilarious) [#45042](https://github.com/ClickHouse/ClickHouse/pull/45042) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Non significant changes [#45046](https://github.com/ClickHouse/ClickHouse/pull/45046) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Don't fix parallel formatting [#45050](https://github.com/ClickHouse/ClickHouse/pull/45050) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix (benign) data race in clickhouse-client [#45053](https://github.com/ClickHouse/ClickHouse/pull/45053) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Analyzer aggregation without column fix [#45055](https://github.com/ClickHouse/ClickHouse/pull/45055) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer ARRAY JOIN crash fix [#45059](https://github.com/ClickHouse/ClickHouse/pull/45059) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race in openSQLiteDB [#45062](https://github.com/ClickHouse/ClickHouse/pull/45062) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Analyzer function IN crash fix [#45064](https://github.com/ClickHouse/ClickHouse/pull/45064) ([Maksim Kita](https://github.com/kitaisreal)). +* JIT compilation float to bool conversion fix [#45067](https://github.com/ClickHouse/ClickHouse/pull/45067) ([Maksim Kita](https://github.com/kitaisreal)). +* Update version_date.tsv and changelogs after v22.11.3.47-stable [#45069](https://github.com/ClickHouse/ClickHouse/pull/45069) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.10.5.54-stable [#45071](https://github.com/ClickHouse/ClickHouse/pull/45071) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.3.16.1190-lts [#45073](https://github.com/ClickHouse/ClickHouse/pull/45073) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Change the color of links in dark reports a little bit [#45077](https://github.com/ClickHouse/ClickHouse/pull/45077) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix Fuzzer script [#45082](https://github.com/ClickHouse/ClickHouse/pull/45082) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try fixing KeeperMap tests [#45094](https://github.com/ClickHouse/ClickHouse/pull/45094) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v22.8.12.45-lts [#45098](https://github.com/ClickHouse/ClickHouse/pull/45098) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Try to fix flaky test_create_user_and_login/test.py::test_login_as_dropped_user_xml [#45099](https://github.com/ClickHouse/ClickHouse/pull/45099) ([Ilya Yatsishin](https://github.com/qoega)). +* Update version_date.tsv and changelogs after v22.10.6.3-stable [#45107](https://github.com/ClickHouse/ClickHouse/pull/45107) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Docs: Make heading consistent with other headings in System Table docs [#45109](https://github.com/ClickHouse/ClickHouse/pull/45109) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v22.11.4.3-stable [#45110](https://github.com/ClickHouse/ClickHouse/pull/45110) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.12.3.5-stable [#45113](https://github.com/ClickHouse/ClickHouse/pull/45113) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Docs: Rewrite awkwardly phrased sentence about flush interval [#45114](https://github.com/ClickHouse/ClickHouse/pull/45114) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix data race in s3Cluster. [#45123](https://github.com/ClickHouse/ClickHouse/pull/45123) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Pull SQLancer image before check run [#45125](https://github.com/ClickHouse/ClickHouse/pull/45125) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix flaky azure test [#45134](https://github.com/ClickHouse/ClickHouse/pull/45134) ([alesapin](https://github.com/alesapin)). +* Minor cleanup in stress/run.sh [#45136](https://github.com/ClickHouse/ClickHouse/pull/45136) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Performance report: "Partial queries" --> "Backward-incompatible queries [#45152](https://github.com/ClickHouse/ClickHouse/pull/45152) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky test_tcp_handler_interserver_listen_host [#45156](https://github.com/ClickHouse/ClickHouse/pull/45156) ([Ilya Yatsishin](https://github.com/qoega)). +* Clean trash from changelog for v22.3.16.1190-lts [#45159](https://github.com/ClickHouse/ClickHouse/pull/45159) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable `test_storage_rabbitmq` [#45161](https://github.com/ClickHouse/ClickHouse/pull/45161) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_ttl_move_memory_usage as too flaky. [#45162](https://github.com/ClickHouse/ClickHouse/pull/45162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* More logging to facilitate debugging of flaky test_ttl_replicated [#45165](https://github.com/ClickHouse/ClickHouse/pull/45165) ([Alexander Gololobov](https://github.com/davenger)). +* Try to fix flaky test_ttl_move_memory_usage [#45168](https://github.com/ClickHouse/ClickHouse/pull/45168) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test test_multiple_disks/test.py::test_rename [#45180](https://github.com/ClickHouse/ClickHouse/pull/45180) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Calculate only required columns in system.detached_parts [#45181](https://github.com/ClickHouse/ClickHouse/pull/45181) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Restart NightlyBuilds if the runner died [#45187](https://github.com/ClickHouse/ClickHouse/pull/45187) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix part ID generation for IP types for backward compatibility [#45191](https://github.com/ClickHouse/ClickHouse/pull/45191) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix integration test test_replicated_users::test_rename_replicated [#45192](https://github.com/ClickHouse/ClickHouse/pull/45192) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add CACHE_INVALIDATOR for sqlancer builds [#45201](https://github.com/ClickHouse/ClickHouse/pull/45201) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix possible stack-use-after-return in LimitReadBuffer [#45203](https://github.com/ClickHouse/ClickHouse/pull/45203) ([Kruglov Pavel](https://github.com/Avogar)). +* Disable check to make test_overcommit_tracker not flaky [#45206](https://github.com/ClickHouse/ClickHouse/pull/45206) ([Dmitry Novik](https://github.com/novikd)). +* Fix flaky test `01961_roaring_memory_tracking` (3) [#45208](https://github.com/ClickHouse/ClickHouse/pull/45208) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove trash from stress test [#45211](https://github.com/ClickHouse/ClickHouse/pull/45211) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* remove unused function [#45212](https://github.com/ClickHouse/ClickHouse/pull/45212) ([flynn](https://github.com/ucasfl)). +* Fix flaky `test_keeper_three_nodes_two_alive` [#45213](https://github.com/ClickHouse/ClickHouse/pull/45213) ([Antonio Andelic](https://github.com/antonio2368)). +* Fuzz PREWHERE clause [#45222](https://github.com/ClickHouse/ClickHouse/pull/45222) ([Alexander Gololobov](https://github.com/davenger)). +* Added a test for merge join key condition with big int & decimal [#45228](https://github.com/ClickHouse/ClickHouse/pull/45228) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix rare logical error: `Too large alignment` [#45229](https://github.com/ClickHouse/ClickHouse/pull/45229) ([Anton Popov](https://github.com/CurtizJ)). +* Update version_date.tsv and changelogs after v22.3.17.13-lts [#45234](https://github.com/ClickHouse/ClickHouse/pull/45234) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* More verbose logs about replication log entries [#45235](https://github.com/ClickHouse/ClickHouse/pull/45235) ([Alexander Tokmakov](https://github.com/tavplubix)). +* One more attempt to fix race in TCPHandler [#45240](https://github.com/ClickHouse/ClickHouse/pull/45240) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update clickhouse-test [#45251](https://github.com/ClickHouse/ClickHouse/pull/45251) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Planner small fixes [#45254](https://github.com/ClickHouse/ClickHouse/pull/45254) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix log level "Test" for send_logs_level in client [#45273](https://github.com/ClickHouse/ClickHouse/pull/45273) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix clickhouse binaries detection [#45283](https://github.com/ClickHouse/ClickHouse/pull/45283) ([Azat Khuzhin](https://github.com/azat)). +* tests/ci: encode HTML entities in the reports [#45284](https://github.com/ClickHouse/ClickHouse/pull/45284) ([Azat Khuzhin](https://github.com/azat)). +* Disable `02151_hash_table_sizes_stats_distributed` under TSAN [#45287](https://github.com/ClickHouse/ClickHouse/pull/45287) ([Nikita Taranov](https://github.com/nickitat)). +* Fix wrong approved_at, simplify conditions [#45302](https://github.com/ClickHouse/ClickHouse/pull/45302) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable 02028_create_select_settings with Ordinary [#45307](https://github.com/ClickHouse/ClickHouse/pull/45307) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Save message format strings for DB::Exception [#45342](https://github.com/ClickHouse/ClickHouse/pull/45342) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Slightly better output for glibc check [#45353](https://github.com/ClickHouse/ClickHouse/pull/45353) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add checks for compilation of regexps [#45356](https://github.com/ClickHouse/ClickHouse/pull/45356) ([Anton Popov](https://github.com/CurtizJ)). +* Analyzer compound identifier typo correction fix [#45357](https://github.com/ClickHouse/ClickHouse/pull/45357) ([Maksim Kita](https://github.com/kitaisreal)). +* Bump to newer version of debug-action [#45359](https://github.com/ClickHouse/ClickHouse/pull/45359) ([Ilya Yatsishin](https://github.com/qoega)). +* Improve failed kafka startup logging [#45369](https://github.com/ClickHouse/ClickHouse/pull/45369) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix flaky ttl test [#45370](https://github.com/ClickHouse/ClickHouse/pull/45370) ([alesapin](https://github.com/alesapin)). +* Add detailed profile events for throttling [#45373](https://github.com/ClickHouse/ClickHouse/pull/45373) ([Sergei Trifonov](https://github.com/serxa)). +* Update .gitignore [#45378](https://github.com/ClickHouse/ClickHouse/pull/45378) ([Nikolay Degterinsky](https://github.com/evillique)). +* Make test simpler to see errors [#45402](https://github.com/ClickHouse/ClickHouse/pull/45402) ([Ilya Yatsishin](https://github.com/qoega)). +* Reduce an amount of trash in `tests_system_merges` [#45403](https://github.com/ClickHouse/ClickHouse/pull/45403) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix reading from encrypted disk with passed file size [#45418](https://github.com/ClickHouse/ClickHouse/pull/45418) ([Anton Popov](https://github.com/CurtizJ)). +* Add delete by ttl for zookeeper_log [#45419](https://github.com/ClickHouse/ClickHouse/pull/45419) ([Nikita Taranov](https://github.com/nickitat)). +* Get rid of artifactory in favor of r2 + ch-repos-manager [#45421](https://github.com/ClickHouse/ClickHouse/pull/45421) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Minor improvements around reading from remote [#45442](https://github.com/ClickHouse/ClickHouse/pull/45442) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Docs: Beautify section on secondary index types [#45444](https://github.com/ClickHouse/ClickHouse/pull/45444) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix Buffer's offsets mismatch logical error in stress test [#45446](https://github.com/ClickHouse/ClickHouse/pull/45446) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Better formatting for exception messages [#45449](https://github.com/ClickHouse/ClickHouse/pull/45449) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add default GRANULARITY argument for secondary indexes [#45451](https://github.com/ClickHouse/ClickHouse/pull/45451) ([Nikolay Degterinsky](https://github.com/evillique)). +* Cleanup of inverted index [#45460](https://github.com/ClickHouse/ClickHouse/pull/45460) ([Robert Schulze](https://github.com/rschu1ze)). +* CherryPick: Fix a wrong staring search date [#45466](https://github.com/ClickHouse/ClickHouse/pull/45466) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix typos [#45470](https://github.com/ClickHouse/ClickHouse/pull/45470) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix possible aborts in arrow lib [#45478](https://github.com/ClickHouse/ClickHouse/pull/45478) ([Kruglov Pavel](https://github.com/Avogar)). +* Add more retries to AST Fuzzer [#45479](https://github.com/ClickHouse/ClickHouse/pull/45479) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix schema inference from insertion table in hdfsCluster [#45483](https://github.com/ClickHouse/ClickHouse/pull/45483) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove unnecessary getTotalRowCount function calls [#45485](https://github.com/ClickHouse/ClickHouse/pull/45485) ([Maksim Kita](https://github.com/kitaisreal)). +* Use new copy s3 functions in S3ObjectStorage [#45487](https://github.com/ClickHouse/ClickHouse/pull/45487) ([Vitaly Baranov](https://github.com/vitlibar)). +* Forward declaration of ConcurrentBoundedQueue in ThreadStatus [#45489](https://github.com/ClickHouse/ClickHouse/pull/45489) ([Azat Khuzhin](https://github.com/azat)). +* Revert "Merge pull request [#44922](https://github.com/ClickHouse/ClickHouse/issues/44922) from azat/dist/async-INSERT-metrics" [#45492](https://github.com/ClickHouse/ClickHouse/pull/45492) ([Azat Khuzhin](https://github.com/azat)). +* Docs: Fix weird formatting [#45495](https://github.com/ClickHouse/ClickHouse/pull/45495) ([Robert Schulze](https://github.com/rschu1ze)). +* Docs: Fix link to writing guide [#45496](https://github.com/ClickHouse/ClickHouse/pull/45496) ([Robert Schulze](https://github.com/rschu1ze)). +* Improve logging for TeePopen.timeout exceeded [#45504](https://github.com/ClickHouse/ClickHouse/pull/45504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix MSan build once again (too heavy translation units) [#45512](https://github.com/ClickHouse/ClickHouse/pull/45512) ([Nikolay Degterinsky](https://github.com/evillique)). +* Additional check in MergeTreeReadPool [#45515](https://github.com/ClickHouse/ClickHouse/pull/45515) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update test_system_merges/test.py [#45516](https://github.com/ClickHouse/ClickHouse/pull/45516) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Merge pull request [#45493](https://github.com/ClickHouse/ClickHouse/issues/45493) from azat/fix-detach" [#45545](https://github.com/ClickHouse/ClickHouse/pull/45545) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update stress [#45546](https://github.com/ClickHouse/ClickHouse/pull/45546) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Ignore utf errors in clickhouse-test reportLogStats [#45556](https://github.com/ClickHouse/ClickHouse/pull/45556) ([Vladimir C](https://github.com/vdimir)). +* Resubmit "Fix possible in-use table after DETACH" [#45566](https://github.com/ClickHouse/ClickHouse/pull/45566) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Typo: "Granulesis" --> "Granules" [#45598](https://github.com/ClickHouse/ClickHouse/pull/45598) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix version in autogenerated_versions.txt [#45624](https://github.com/ClickHouse/ClickHouse/pull/45624) ([Dmitry Novik](https://github.com/novikd)). + diff --git a/docs/en/engines/table-engines/integrations/hdfs.md b/docs/en/engines/table-engines/integrations/hdfs.md index f6b8e3a1da6..7c04a6594a6 100644 --- a/docs/en/engines/table-engines/integrations/hdfs.md +++ b/docs/en/engines/table-engines/integrations/hdfs.md @@ -21,6 +21,13 @@ ENGINE = HDFS(URI, format) `SELECT` queries, the format must be supported for input, and to perform `INSERT` queries – for output. The available formats are listed in the [Formats](../../../interfaces/formats.md#formats) section. +- [PARTITION BY expr] + +### PARTITION BY + +`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. **Example:** diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 245bd6f4468..723425429a5 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -13,6 +13,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec ``` sql CREATE TABLE s3_engine_table (name String, value UInt32) ENGINE = S3(path, [aws_access_key_id, aws_secret_access_key,] format, [compression]) + [PARTITION BY expr] [SETTINGS ...] ``` @@ -23,6 +24,12 @@ CREATE TABLE s3_engine_table (name String, value UInt32) - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). - `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. +### PARTITION BY + +`PARTITION BY` — Optional. In most cases you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. + **Example** ``` sql diff --git a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md index 693902b7d9b..9facb746eff 100644 --- a/docs/en/engines/table-engines/mergetree-family/invertedindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/invertedindexes.md @@ -43,12 +43,12 @@ To use the index, no special functions or syntax are required. Typical string se examples, consider: ```sql -SELECT * from tab WHERE s == 'Hello World;; +SELECT * from tab WHERE s == 'Hello World; SELECT * from tab WHERE s IN (‘Hello’, ‘World’); SELECT * from tab WHERE s LIKE ‘%Hello%’; SELECT * from tab WHERE multiSearchAny(s, ‘Hello’, ‘World’); SELECT * from tab WHERE hasToken(s, ‘Hello’); -SELECT * from tab WHERE multiSearchAll(s, [‘Hello’, ‘World’]) +SELECT * from tab WHERE multiSearchAll(s, [‘Hello’, ‘World’]); ``` The inverted index also works on columns of type `Array(String)`, `Array(FixedString)`, `Map(String)` and `Map(String)`. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 7daa0dbbb97..c24e56da7f4 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -77,7 +77,7 @@ Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting th #### PARTITION BY -`PARTITION BY` — The [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). +`PARTITION BY` — The [partitioning key](/docs/en/engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases, you don't need a partition key, and if you do need to partition, generally you do not need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. @@ -470,6 +470,9 @@ The `set` index can be used with all functions. Function subsets for other index | [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | | [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | | hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenCaseInsensitive | ✗ | ✗ | ✗ | ✔ | ✗ | +| hasTokenCaseInsensitiveOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization. diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 5a9113f3a18..8314c511236 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -86,3 +86,9 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 - `SELECT ... SAMPLE` - Indices - Replication + +## PARTITION BY + +`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 095ffbbb827..af8a80c75b0 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -96,3 +96,9 @@ SELECT * FROM url_engine_table - `ALTER` and `SELECT...SAMPLE` operations. - Indexes. - Replication. + +## PARTITION BY + +`PARTITION BY` — Optional. It is possible to create separate files by partitioning the data on a partition key. In most cases, you don't need a partition key, and if it is needed you generally don't need a partition key more granular than by month. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead, make client identifier or name the first column in the ORDER BY expression). + +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](/docs/en/sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. diff --git a/docs/en/getting-started/example-datasets/menus.md b/docs/en/getting-started/example-datasets/menus.md index 10baf899fc6..32fe62865d4 100644 --- a/docs/en/getting-started/example-datasets/menus.md +++ b/docs/en/getting-started/example-datasets/menus.md @@ -119,9 +119,9 @@ We use [CSVWithNames](../../interfaces/formats.md#csvwithnames) format as the da We disable `format_csv_allow_single_quotes` as only double quotes are used for data fields and single quotes can be inside the values and should not confuse the CSV parser. -We disable [input_format_null_as_default](../../operations/settings/settings.md#settings-input-format-null-as-default) as our data does not have [NULL](../../sql-reference/syntax.md#null-literal). Otherwise ClickHouse will try to parse `\N` sequences and can be confused with `\` in data. +We disable [input_format_null_as_default](../../operations/settings/settings-formats.md#settings-input-format-null-as-default) as our data does not have [NULL](../../sql-reference/syntax.md#null-literal). Otherwise ClickHouse will try to parse `\N` sequences and can be confused with `\` in data. -The setting [date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format) allows to parse [DateTime](../../sql-reference/data-types/datetime.md) fields in wide variety of formats. For example, ISO-8601 without seconds like '2000-01-01 01:02' will be recognized. Without this setting only fixed DateTime format is allowed. +The setting [date_time_input_format best_effort](../../operations/settings/settings-formats.md#settings-date_time_input_format) allows to parse [DateTime](../../sql-reference/data-types/datetime.md) fields in wide variety of formats. For example, ISO-8601 without seconds like '2000-01-01 01:02' will be recognized. Without this setting only fixed DateTime format is allowed. ## Denormalize the Data {#denormalize-data} diff --git a/docs/en/getting-started/example-datasets/opensky.md b/docs/en/getting-started/example-datasets/opensky.md index 12a33791235..7093a2df04f 100644 --- a/docs/en/getting-started/example-datasets/opensky.md +++ b/docs/en/getting-started/example-datasets/opensky.md @@ -60,7 +60,7 @@ ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhou `xargs -P100` specifies to use up to 100 parallel workers but as we only have 30 files, the number of workers will be only 30. - For every file, `xargs` will run a script with `bash -c`. The script has substitution in form of `{}` and the `xargs` command will substitute the filename to it (we have asked it for `xargs` with `-I{}`). - The script will decompress the file (`gzip -c -d "{}"`) to standard output (`-c` parameter) and the output is redirected to `clickhouse-client`. -- We also asked to parse [DateTime](../../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../../operations/settings/settings.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets. +- We also asked to parse [DateTime](../../sql-reference/data-types/datetime.md) fields with extended parser ([--date_time_input_format best_effort](../../operations/settings/settings-formats.md#settings-date_time_input_format)) to recognize ISO-8601 format with timezone offsets. Finally, `clickhouse-client` will do insertion. It will read input data in [CSVWithNames](../../interfaces/formats.md#csvwithnames) format. diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md index e72e23208ac..b9c67f6969d 100644 --- a/docs/en/getting-started/index.md +++ b/docs/en/getting-started/index.md @@ -22,8 +22,8 @@ functions in ClickHouse. The sample datasets include: - The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse - The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables - The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data -- The [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset -- The [Getting Data Into ClickHouse - Part 2 - A JSON detour](https://clickhouse.com/blog/getting-data-into-clickhouse-part-2-json) shows how JSON data can be loaded -- The [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3 +- [Getting Data Into ClickHouse - Part 1](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) provides examples of defining a schema and loading a small Hacker News dataset +- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) has examples of loading data from s3 +- [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) shows how to generate random data if none of the above fit your needs. View the **Tutorials and Datasets** menu for a complete list of sample datasets. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index d384ed639eb..3dc269f6309 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -85,7 +85,7 @@ The supported formats are: | [MySQLDump](#mysqldump) | ✔ | ✗ | -You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings.md) section. +You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section. ## TabSeparated {#tabseparated} @@ -148,10 +148,10 @@ Only a small set of symbols are escaped. You can easily stumble onto a string va Arrays are written as a list of comma-separated values in square brackets. Number items in the array are formatted as normally. `Date` and `DateTime` types are written in single quotes. Strings are written in single quotes with the same escaping rules as above. -[NULL](/docs/en/sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) (default value is `\N`). +[NULL](/docs/en/sql-reference/syntax.md) is formatted according to setting [format_tsv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_tsv_null_representation) (default value is `\N`). In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) to optimize ENUM parsing. Each element of [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) structures is represented as an array. @@ -183,12 +183,13 @@ SELECT * FROM nestedt FORMAT TSV ### TabSeparated format settings {#tabseparated-format-settings} -- [format_tsv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. -- [input_format_tsv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. -- [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [format_tsv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_tsv_null_representation) - custom NULL representation in TSV format. Default value - `\N`. +- [input_format_tsv_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_empty_as_default) - treat empty fields in TSV input as default values. Default value - `false`. For complex default expressions [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_tsv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_enum_as_number) - treat inserted enum values in TSV formats as enum indices. Default value - `false`. +- [input_format_tsv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [output_format_tsv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_tsv_crlf_end_of_line) - if it is set true, end of line in TSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. +- [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. ## TabSeparatedRaw {#tabseparatedraw} @@ -204,8 +205,8 @@ Differs from the `TabSeparated` format in that the column names are written in t During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness. :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns of the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -216,10 +217,10 @@ This format is also available under the name `TSVWithNames`. Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row. :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from the input data will be mapped to the columns in the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -427,49 +428,50 @@ Both data output and parsing are supported in this format. For parsing, any orde Parsing allows the presence of the additional field `tskv` without the equal sign or a value. This field is ignored. -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. ## CSV {#csv} Comma Separated Values format ([RFC](https://tools.ietf.org/html/rfc4180)). -When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). +When formatting, rows are enclosed in double quotes. A double quote inside a string is output as two double quotes in a row. There are no other rules for escaping characters. Date and date-time are enclosed in double quotes. Numbers are output without quotes. Values are separated by a delimiter character, which is `,` by default. The delimiter character is defined in the setting [format_csv_delimiter](/docs/en/operations/settings/settings-formats.md/#format_csv_delimiter). Rows are separated using the Unix line feed (LF). Arrays are serialized in CSV as follows: first, the array is serialized to a string as in TabSeparated format, and then the resulting string is output to CSV in double quotes. Tuples in CSV format are serialized as separate columns (that is, their nesting in the tuple is lost). ``` bash $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv ``` -\*By default, the delimiter is `,`. See the [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) setting for more information. +\*By default, the delimiter is `,`. See the [format_csv_delimiter](/docs/en/operations/settings/settings-formats.md/#format_csv_delimiter) setting for more information. When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported. -`NULL` is formatted according to setting [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_csv_null_representation) (default value is `\N`). +`NULL` is formatted according to setting [format_csv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_csv_null_representation) (default value is `\N`). In input data, ENUM values can be represented as names or as ids. First, we try to match the input value to the ENUM name. If we fail and the input value is a number, we try to match this number to the ENUM id. -If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) to optimize ENUM parsing. +If input data contains only ENUM ids, it's recommended to enable the setting [input_format_csv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_csv_enum_as_number) to optimize ENUM parsing. The CSV format supports the output of totals and extremes the same way as `TabSeparated`. ### CSV format settings {#csv-format-settings} -- [format_csv_delimiter](/docs/en/operations/settings/settings.md/#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. -- [format_csv_allow_single_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. -- [format_csv_allow_double_quotes](/docs/en/operations/settings/settings.md/#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. -- [format_csv_null_representation](/docs/en/operations/settings/settings.md/#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. -- [input_format_csv_empty_as_default](/docs/en/operations/settings/settings.md/#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions, [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) must be enabled too. -- [input_format_csv_enum_as_number](/docs/en/operations/settings/settings.md/#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. -- [input_format_csv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. -- [input_format_csv_arrays_as_nested_csv](/docs/en/operations/settings/settings.md/#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. -- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. -- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`. +- [format_csv_delimiter](/docs/en/operations/settings/settings-formats.md/#format_csv_delimiter) - the character to be considered as a delimiter in CSV data. Default value - `,`. +- [format_csv_allow_single_quotes](/docs/en/operations/settings/settings-formats.md/#format_csv_allow_single_quotes) - allow strings in single quotes. Default value - `true`. +- [format_csv_allow_double_quotes](/docs/en/operations/settings/settings-formats.md/#format_csv_allow_double_quotes) - allow strings in double quotes. Default value - `true`. +- [format_csv_null_representation](/docs/en/operations/settings/settings-formats.md/#format_tsv_null_representation) - custom NULL representation in CSV format. Default value - `\N`. +- [input_format_csv_empty_as_default](/docs/en/operations/settings/settings-formats.md/#input_format_csv_empty_as_default) - treat empty fields in CSV input as default values. Default value - `true`. For complex default expressions, [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) must be enabled too. +- [input_format_csv_enum_as_number](/docs/en/operations/settings/settings-formats.md/#input_format_csv_enum_as_number) - treat inserted enum values in CSV formats as enum indices. Default value - `false`. +- [input_format_csv_use_best_effort_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_best_effort_in_schema_inference) - use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be inferred as Strings. Default value - `true`. +- [input_format_csv_arrays_as_nested_csv](/docs/en/operations/settings/settings-formats.md/#input_format_csv_arrays_as_nested_csv) - when reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Default value - `false`. +- [output_format_csv_crlf_end_of_line](/docs/en/operations/settings/settings-formats.md/#output_format_csv_crlf_end_of_line) - if it is set to true, end of line in CSV output format will be `\r\n` instead of `\n`. Default value - `false`. +- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`. +- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`. ## CSVWithNames {#csvwithnames} Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -478,16 +480,18 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## CustomSeparated {#format-customseparated} -Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings.md/#format_custom_result_after_delimiter) settings, not from format strings. +Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings. + +If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces). @@ -496,8 +500,8 @@ There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [Templat Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -506,10 +510,10 @@ Otherwise, the first row will be skipped. Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -535,11 +539,11 @@ To read data output by this format you can use [MySQLDump](#mysqldump) input for ### SQLInsert format settings {#sqlinsert-format-settings} -- [output_format_sql_insert_max_batch_size](/docs/en/operations/settings/settings.md/#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. -- [output_format_sql_insert_table_name](/docs/en/operations/settings/settings.md/#output_format_sql_insert_table_name) - The name of the table in the output INSERT query. Default value - `'table'`. -- [output_format_sql_insert_include_column_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. -- [output_format_sql_insert_use_replace](/docs/en/operations/settings/settings.md/#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. -- [output_format_sql_insert_quote_names](/docs/en/operations/settings/settings.md/#output_format_sql_insert_quote_names) - Quote column names with "\`" characters. Default value - `true`. +- [output_format_sql_insert_max_batch_size](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_max_batch_size) - The maximum number of rows in one INSERT statement. Default value - `65505`. +- [output_format_sql_insert_table_name](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_table_name) - The name of the table in the output INSERT query. Default value - `'table'`. +- [output_format_sql_insert_include_column_names](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_include_column_names) - Include column names in INSERT query. Default value - `true`. +- [output_format_sql_insert_use_replace](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_use_replace) - Use REPLACE statement instead of INSERT. Default value - `false`. +- [output_format_sql_insert_quote_names](/docs/en/operations/settings/settings-formats.md/#output_format_sql_insert_quote_names) - Quote column names with "\`" characters. Default value - `true`. ## JSON {#json} @@ -599,7 +603,7 @@ SELECT SearchPhrase, count() AS c FROM test.hits GROUP BY SearchPhrase WITH TOTA } ``` -The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) to 0. +The JSON is compatible with JavaScript. To ensure this, some characters are additionally escaped: the slash `/` is escaped as `\/`; alternative line breaks `U+2028` and `U+2029`, which break some browsers, are escaped as `\uXXXX`. ASCII control characters are escaped: backspace, form feed, line feed, carriage return, and horizontal tab are replaced with `\b`, `\f`, `\n`, `\r`, `\t` , as well as the remaining bytes in the 00-1F range using `\uXXXX` sequences. Invalid UTF-8 sequences are changed to the replacement character � so the output text will consist of valid UTF-8 sequences. For compatibility with JavaScript, Int64 and UInt64 integers are enclosed in double quotes by default. To remove the quotes, you can set the configuration parameter [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) to 0. `rows` – The total number of output rows. @@ -610,14 +614,14 @@ If the query contains GROUP BY, rows_before_limit_at_least is the exact number o `extremes` – Extreme values (when extremes are set to 1). -ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) to 1. +ClickHouse supports [NULL](/docs/en/sql-reference/syntax.md), which is displayed as `null` in the JSON output. To enable `+nan`, `-nan`, `+inf`, `-inf` values in output, set the [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) to 1. **See Also** - [JSONEachRow](#jsoneachrow) format -- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) setting +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) setting -For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, +For JSON input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings-formats.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONStrings {#jsonstrings} @@ -690,8 +694,8 @@ Example: } ``` -During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. -Columns that are not present in the block will be filled with default values (you can use the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) +During import, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. +Columns that are not present in the block will be filled with default values (you can use the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONColumnsWithMetadata {#jsoncolumnsmonoblock} @@ -739,7 +743,7 @@ Example: } ``` -For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings.md/#input_format_json_validate_types_from_metadata) is set to 1, +For JSONColumnsWithMetadata input format, if setting [input_format_json_validate_types_from_metadata](/docs/en/operations/settings/settings-formats.md/#input_format_json_validate_types_from_metadata) is set to 1, the types from metadata in input data will be compared with the types of the corresponding columns from the table. ## JSONAsString {#jsonasstring} @@ -891,7 +895,7 @@ Example: ] ``` -Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting here) +Columns that are not present in the block will be filled with default values (you can use [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting here) ## JSONEachRow {#jsoneachrow} @@ -905,7 +909,7 @@ Example: {"num":44,"str":"hello","arr":[0,1,2,3]} ``` -While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +While importing data columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. ## JSONStringsEachRow {#jsonstringseachrow} @@ -960,8 +964,8 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -970,10 +974,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -982,8 +986,8 @@ the types from input data will be compared with the types of the corresponding c Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -992,10 +996,10 @@ Otherwise, the first row will be skipped. Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes). :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: @@ -1021,7 +1025,7 @@ Example: } ``` -To use an object name as a column value you can use the special setting [format_json_object_each_row_column_for_object_name](/docs/en/operations/settings/settings.md/#format_json_object_each_row_column_for_object_name). The value of this setting is set to the name of a column, that is used as JSON key for a row in the resulting object. +To use an object name as a column value you can use the special setting [format_json_object_each_row_column_for_object_name](/docs/en/operations/settings/settings-formats.md/#format_json_object_each_row_column_for_object_name). The value of this setting is set to the name of a column, that is used as JSON key for a row in the resulting object. Examples: For output: @@ -1095,7 +1099,7 @@ ClickHouse ignores spaces between elements and commas after the objects. You can ClickHouse substitutes omitted values with the default values for the corresponding [data types](/docs/en/sql-reference/data-types/index.md). -If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings.md/#input_format_defaults_for_omitted_fields) setting. +If `DEFAULT expr` is specified, ClickHouse uses different substitution rules depending on the [input_format_defaults_for_omitted_fields](/docs/en/operations/settings/settings-formats.md/#input_format_defaults_for_omitted_fields) setting. Consider the following table: @@ -1140,7 +1144,7 @@ Any set of bytes can be output in the strings. Use the `JSONEachRow` format if y ### Usage of Nested Structures {#jsoneachrow-nested} -If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) setting. +If you have a table with [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) data type columns, you can insert JSON data with the same structure. Enable this feature with the [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) setting. For example, consider the following table: @@ -1154,7 +1158,7 @@ As you can see in the `Nested` data type description, ClickHouse treats each com INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n.s": ["abc", "def"], "n.i": [1, 23]} ``` -To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](/docs/en/operations/settings/settings.md/#input_format_import_nested_json). +To insert data as a hierarchical JSON object, set [input_format_import_nested_json=1](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json). ``` json { @@ -1199,20 +1203,20 @@ SELECT * FROM json_each_row_nested ### JSON formats settings {#json-formats-settings} -- [input_format_import_nested_json](/docs/en/operations/settings/settings.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. -- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. -- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. -- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. -- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. -- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. -- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. -- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. -- [output_format_json_quote_denormals](/docs/en/operations/settings/settings.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. -- [output_format_json_quote_decimals](/docs/en/operations/settings/settings.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. -- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. -- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`. -- [output_format_json_array_of_rows](/docs/en/operations/settings/settings.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. -- [output_format_json_validate_utf8](/docs/en/operations/settings/settings.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. +- [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. +- [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. +- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. +- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. +- [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. +- [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. +- [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. +- [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. +- [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. +- [output_format_json_quote_decimals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_decimals) - controls quoting of decimals in JSON output format. Default value - `false`. +- [output_format_json_escape_forward_slashes](/docs/en/operations/settings/settings-formats.md/#output_format_json_escape_forward_slashes) - controls escaping forward slashes for string outputs in JSON output format. Default value - `true`. +- [output_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#output_format_json_named_tuples_as_objects) - serialize named tuple columns as JSON objects. Default value - `true`. +- [output_format_json_array_of_rows](/docs/en/operations/settings/settings-formats.md/#output_format_json_array_of_rows) - output a JSON array of all rows in JSONEachRow(Compact) format. Default value - `false`. +- [output_format_json_validate_utf8](/docs/en/operations/settings/settings-formats.md/#output_format_json_validate_utf8) - enables validation of UTF-8 sequences in JSON output formats (note that it doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate utf8). Default value - `false`. ## BSONEachRow {#bsoneachrow} @@ -1274,8 +1278,8 @@ Note: this format don't work properly on Big-Endian platforms. ### BSON format settings {#bson-format-settings} -- [output_format_bson_string_as_string](/docs/en/operations/settings/settings.md/#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. -- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. +- [output_format_bson_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_bson_string_as_string) - use BSON String type instead of Binary for String columns. Default value - `false`. +- [input_format_bson_skip_fields_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for format BSONEachRow. Default value - `false`. ## Native {#native} @@ -1408,12 +1412,12 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 ## Pretty formats settings {#pretty-formats-settings} -- [output_format_pretty_max_rows](/docs/en/operations/settings/settings.md/#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. -- [output_format_pretty_max_column_pad_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. -- [output_format_pretty_max_value_width](/docs/en/operations/settings/settings.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. -- [output_format_pretty_color](/docs/en/operations/settings/settings.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. -- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. -- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. +- [output_format_pretty_max_rows](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_rows) - rows limit for Pretty formats. Default value - `10000`. +- [output_format_pretty_max_column_pad_width](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_column_pad_width) - maximum width to pad all values in a column in Pretty formats. Default value - `250`. +- [output_format_pretty_max_value_width](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_max_value_width) - Maximum width of value to display in Pretty formats. If greater - it will be cut. Default value - `10000`. +- [output_format_pretty_color](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_color) - use ANSI escape sequences to paint colors in Pretty formats. Default value - `true`. +- [output_format_pretty_grid_charset](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_grid_charset) - Charset for printing grid borders. Available charsets: ASCII, UTF-8. Default value - `UTF-8`. +- [output_format_pretty_row_numbers](/docs/en/operations/settings/settings-formats.md/#output_format_pretty_row_numbers) - Add row numbers before each row for pretty output format. Default value - `false`. ## RowBinary {#rowbinary} @@ -1438,8 +1442,8 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column names :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. ::: @@ -1452,16 +1456,16 @@ Similar to [RowBinary](#rowbinary), but with added header: - N `String`s specifying column types :::warning -If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings.md/#input_format_with_names_use_header) is set to 1, -the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +If setting [input_format_with_names_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_names_use_header) is set to 1, +the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. Otherwise, the first row will be skipped. -If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings.md/#input_format_with_types_use_header) is set to 1, +If setting [input_format_with_types_use_header](/docs/en/operations/settings/settings-formats.md/#input_format_with_types_use_header) is set to 1, the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: ## RowBinary format settings {#row-binary-format-settings} -- [format_binary_max_string_size](/docs/en/operations/settings/settings.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. +- [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. ## Values {#data-format-values} @@ -1473,9 +1477,9 @@ This is the format that is used in `INSERT INTO t VALUES ...`, but you can also ## Values format settings {#values-format-settings} -- [input_format_values_interpret_expressions](/docs/en/operations/settings/settings.md/#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. -- [input_format_values_deduce_templates_of_expressions](/docs/en/operations/settings/settings.md/#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. -- [input_format_values_accurate_types_of_literals](/docs/en/operations/settings/settings.md/#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. +- [input_format_values_interpret_expressions](/docs/en/operations/settings/settings-formats.md/#input_format_values_interpret_expressions) - if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression. Default value - `true`. +- [input_format_values_deduce_templates_of_expressions](/docs/en/operations/settings/settings-formats.md/#input_format_values_deduce_templates_of_expressions) -if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows. Default value - `true`. +- [input_format_values_accurate_types_of_literals](/docs/en/operations/settings/settings-formats.md/#input_format_values_accurate_types_of_literals) - when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues. Default value - `true`. ## Vertical {#vertical} @@ -1615,7 +1619,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `LIST` | [Array](/docs/en/sql-reference/data-types/array.md) | `LIST` | | `STRUCT` | [Tuple](/docs/en/sql-reference/data-types/tuple.md) | `STRUCT` | -For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings.md/#format_capn_proto_enum_comparising_mode) setting. +For working with `Enum` in CapnProto format use the [format_capn_proto_enum_comparising_mode](/docs/en/operations/settings/settings-formats.md/#format_capn_proto_enum_comparising_mode) setting. Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` type also can be nested. @@ -1714,7 +1718,7 @@ something_weird{problem="division by zero"} +Inf -3982045 ## Protobuf {#protobuf} -Protobuf - is a [Protocol Buffers](https://developers.google.com/protocol-buffers/) format. +Protobuf - is a [Protocol Buffers](https://protobuf.dev/) format. This format requires an external format schema. The schema is cached between queries. ClickHouse supports both `proto2` and `proto3` syntaxes. Repeated/optional/required fields are supported. @@ -1809,7 +1813,7 @@ The table below shows supported data types and how they match ClickHouse [data t | `long (timestamp-millis)` \** | [DateTime64(3)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-millis)` \* | | `long (timestamp-micros)` \** | [DateTime64(6)](/docs/en/sql-reference/data-types/datetime.md) | `long (timestamp-micros)` \* | -\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings.md/#output_format_avro_string_column_pattern) +\* `bytes` is default, controlled by [output_format_avro_string_column_pattern](/docs/en/operations/settings/settings-formats.md/#output_format_avro_string_column_pattern) \** [Avro logical types](https://avro.apache.org/docs/current/spec.html#Logical+Types) Unsupported Avro data types: `record` (non-root), `map` @@ -1831,7 +1835,7 @@ Unused fields are skipped. Data types of ClickHouse table columns can differ from the corresponding fields of the Avro data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [casts](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) the data to corresponding column type. -While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](/docs/en/operations/settings/settings.md/#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. +While importing data, when field is not found in schema and setting [input_format_avro_allow_missing_fields](/docs/en/operations/settings/settings-formats.md/#input_format_avro_allow_missing_fields) is enabled, default value will be used instead of error. ### Selecting Data {#selecting-data-1} @@ -1846,7 +1850,7 @@ Column names must: - start with `[A-Za-z_]` - subsequently contain only `[A-Za-z0-9_]` -Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings.md/#output_format_avro_sync_interval) respectively. +Output Avro file compression and sync interval can be configured with [output_format_avro_codec](/docs/en/operations/settings/settings-formats.md/#output_format_avro_codec) and [output_format_avro_sync_interval](/docs/en/operations/settings/settings-formats.md/#output_format_avro_sync_interval) respectively. ## AvroConfluent {#data-format-avro-confluent} @@ -1856,7 +1860,7 @@ Each Avro message embeds a schema id that can be resolved to the actual schema w Schemas are cached once resolved. -Schema Registry URL is configured with [format_avro_schema_registry_url](/docs/en/operations/settings/settings.md/#format_avro_schema_registry_url). +Schema Registry URL is configured with [format_avro_schema_registry_url](/docs/en/operations/settings/settings-formats.md/#format_avro_schema_registry_url). ### Data Types Matching {#data_types-matching-1} @@ -1954,12 +1958,12 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t ### Parquet format settings {#parquet-format-settings} -- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. -- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. -- [input_format_parquet_import_nested](/docs/en/operations/settings/settings.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. -- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. -- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. -- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. +- [output_format_parquet_row_group_size](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_row_group_size) - row group size in rows while data output. Default value - `1000000`. +- [output_format_parquet_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_string_as_string) - use Parquet String type instead of Binary for String columns. Default value - `false`. +- [input_format_parquet_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_import_nested) - allow inserting array of structs into [Nested](/docs/en/sql-reference/data-types/nested-data-structures/nested.md) table in Parquet input format. Default value - `false`. +- [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. +- [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. +- [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. ## Arrow {#data-format-arrow} @@ -1997,7 +2001,7 @@ The table below shows supported data types and how they match ClickHouse [data t Arrays can be nested and can have a value of the `Nullable` type as an argument. `Tuple` and `Map` types also can be nested. -The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. +The `DICTIONARY` type is supported for `INSERT` queries, and for `SELECT` queries there is an [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output-format-arrow-low-cardinality-as-dictionary) setting that allows to output [LowCardinality](/docs/en/sql-reference/data-types/lowcardinality.md) type as a `DICTIONARY` type. Unsupported Arrow data types: `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`. @@ -2021,12 +2025,12 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. -- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. ## ArrowStream {#data-format-arrow-stream} @@ -2081,11 +2085,11 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. ### Arrow format settings {#parquet-format-settings} -- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. -- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. -- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. -- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. +- [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. +- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. +- [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. +- [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. +- [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). @@ -2133,13 +2137,13 @@ When working with the `Regexp` format, you can use the following settings: **Usage** -The regular expression from [format_regexp](/docs/en/operations/settings/settings.md/#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. +The regular expression from [format_regexp](/docs/en/operations/settings/settings-formats.md/#format_regexp) setting is applied to every line of imported data. The number of subpatterns in the regular expression must be equal to the number of columns in imported dataset. Lines of the imported data must be separated by newline character `'\n'` or DOS-style newline `"\r\n"`. -The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) setting. +The content of every matched subpattern is parsed with the method of corresponding data type, according to [format_regexp_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_regexp_escaping_rule) setting. -If the regular expression does not match the line and [format_regexp_skip_unmatched](/docs/en/operations/settings/settings.md/#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. +If the regular expression does not match the line and [format_regexp_skip_unmatched](/docs/en/operations/settings/settings-formats.md/#format_regexp_escaping_rule) is set to 1, the line is silently skipped. Otherwise, exception is thrown. **Example** @@ -2197,8 +2201,8 @@ in the server configuration. ## Skipping Errors {#skippingerrors} -Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](/docs/en/operations/settings/settings.md/#input_format_allow_errors_num) and -[input_format_allow_errors_ratio](/docs/en/operations/settings/settings.md/#input_format_allow_errors_ratio) settings. +Some formats such as `CSV`, `TabSeparated`, `TSKV`, `JSONEachRow`, `Template`, `CustomSeparated` and `Protobuf` can skip broken row if parsing error occurred and continue parsing from the beginning of next row. See [input_format_allow_errors_num](/docs/en/operations/settings/settings-formats.md/#input_format_allow_errors_num) and +[input_format_allow_errors_ratio](/docs/en/operations/settings/settings-formats.md/#input_format_allow_errors_ratio) settings. Limitations: - In case of parsing error `JSONEachRow` skips all data until the new line (or EOF), so rows must be delimited by `\n` to count errors correctly. - `Template` and `CustomSeparated` use delimiter after the last column and delimiter between rows to find the beginning of next row, so skipping errors works only if at least one of them is not empty. @@ -2277,17 +2281,17 @@ $ clickhouse-client --query="SELECT * FROM msgpack FORMAT MsgPack" > tmp_msgpack ### MsgPack format settings {#msgpack-format-settings} -- [input_format_msgpack_number_of_columns](/docs/en/operations/settings/settings.md/#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. -- [output_format_msgpack_uuid_representation](/docs/en/operations/settings/settings.md/#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. +- [input_format_msgpack_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_msgpack_number_of_columns) - the number of columns in inserted MsgPack data. Used for automatic schema inference from data. Default value - `0`. +- [output_format_msgpack_uuid_representation](/docs/en/operations/settings/settings-formats.md/#output_format_msgpack_uuid_representation) - the way how to output UUID in MsgPack format. Default value - `EXT`. ## MySQLDump {#mysqldump} ClickHouse supports reading MySQL [dumps](https://dev.mysql.com/doc/refman/8.0/en/mysqldump.html). It reads all data from INSERT queries belonging to one table in dump. If there are more than one table, by default it reads data from the first one. -You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_table_name) settings. -If setting [input_format_mysql_dump_map_columns](/docs/en/operations/settings/settings.md/#input_format_mysql_dump_map_columns) is set to 1 and +You can specify the name of the table from which to read data from using [input_format_mysql_dump_table_name](/docs/en/operations/settings/settings-formats.md/#input_format_mysql_dump_table_name) settings. +If setting [input_format_mysql_dump_map_columns](/docs/en/operations/settings/settings-formats.md/#input_format_mysql_dump_map_columns) is set to 1 and dump contains CREATE query for specified table or column names in INSERT query the columns from input data will be mapped to the columns from the table by their names, -columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings.md/#input_format_skip_unknown_fields) is set to 1. +columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](/docs/en/operations/settings/settings-formats.md/#input_format_skip_unknown_fields) is set to 1. This format supports schema inference: if the dump contains CREATE query for the specified table, the structure is extracted from it, otherwise schema is inferred from the data of INSERT queries. Examples: diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index 9d87bdced1a..728afa73a17 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -558,6 +558,8 @@ and if the value is not a number, ClickHouse treats it as a string. If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_csv_use_best_effort_in_schema_inference` and ClickHouse will treat all columns as Strings. +If setting `input_format_csv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default. + **Examples:** Integers, Floats, Bools, Strings: @@ -669,6 +671,61 @@ DESC format(CSV, '"[1,2,3]",42.42,Hello World!') └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +Examples of header auto-detection (when `input_format_csv_detect_header` is enabled): + +Only names: +```sql +SELECT * FROM format(CSV, +$$"number","string","array" +42,"Hello","[1, 2, 3]" +43,"World","[4, 5, 6]" +$$) +``` + +```response +┌─number─┬─string─┬─array───┐ +│ 42 │ Hello │ [1,2,3] │ +│ 43 │ World │ [4,5,6] │ +└────────┴────────┴─────────┘ +``` + +Names and types: + +```sql +DESC format(CSV, +$$"number","string","array" +"UInt32","String","Array(UInt16)" +42,"Hello","[1, 2, 3]" +43,"World","[4, 5, 6]" +$$) +``` + +```response +┌─name───┬─type──────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ UInt32 │ │ │ │ │ │ +│ string │ String │ │ │ │ │ │ +│ array │ Array(UInt16) │ │ │ │ │ │ +└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected: + +```sql +SELECT * FROM format(CSV, +$$"first_column","second_column" +"Hello","World" +"World","Hello" +$$) +``` + +```response +┌─c1───────────┬─c2────────────┐ +│ first_column │ second_column │ +│ Hello │ World │ +│ World │ Hello │ +└──────────────┴───────────────┘ +``` + ## TSV/TSKV {#tsv-tskv} In TSV/TSKV formats ClickHouse extracts column value from the row according to tabular delimiters and then parses extracted value using @@ -677,6 +734,7 @@ the recursive parser to determine the most appropriate type. If the type cannot If you don't want ClickHouse to try to determine complex types using some parsers and heuristics, you can disable setting `input_format_tsv_use_best_effort_in_schema_inference` and ClickHouse will treat all columns as Strings. +If setting `input_format_tsv_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default. **Examples:** @@ -799,6 +857,61 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!') └──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +Examples of header auto-detection (when `input_format_tsv_detect_header` is enabled): + +Only names: +```sql +SELECT * FROM format(TSV, +$$number string array +42 Hello [1, 2, 3] +43 World [4, 5, 6] +$$); +``` + +```response +┌─number─┬─string─┬─array───┐ +│ 42 │ Hello │ [1,2,3] │ +│ 43 │ World │ [4,5,6] │ +└────────┴────────┴─────────┘ +``` + +Names and types: + +```sql +DESC format(TSV, +$$number string array +UInt32 String Array(UInt16) +42 Hello [1, 2, 3] +43 World [4, 5, 6] +$$) +``` + +```response +┌─name───┬─type──────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ number │ UInt32 │ │ │ │ │ │ +│ string │ String │ │ │ │ │ │ +│ array │ Array(UInt16) │ │ │ │ │ │ +└────────┴───────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Note that the header can be detected only if there is at least one column with a non-String type. If all columns have String type, the header is not detected: + +```sql +SELECT * FROM format(TSV, +$$first_column second_column +Hello World +World Hello +$$) +``` + +```response +┌─c1───────────┬─c2────────────┐ +│ first_column │ second_column │ +│ Hello │ World │ +│ World │ Hello │ +└──────────────┴───────────────┘ +``` + ## Values {#values} In Values format ClickHouse extracts column value from the row and then parses it using @@ -911,6 +1024,8 @@ DESC format(TSV, '[1,2,3] 42.42 Hello World!') In CustomSeparated format ClickHouse first extracts all column values from the row according to specified delimiters and then tries to infer the data type for each value according to escaping rule. +If setting `input_format_custom_detect_header` is enabled, ClickHouse will try to detect the header with column names (and maybe types) while inferring schema. This setting is enabled by default. + **Example** ```sql @@ -937,6 +1052,34 @@ $$) └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +Example of header auto-detection (when `input_format_custom_detect_header` is enabled): + +```sql +SET format_custom_row_before_delimiter = '', + format_custom_row_after_delimiter = '\n', + format_custom_row_between_delimiter = '\n', + format_custom_result_before_delimiter = '\n', + format_custom_result_after_delimiter = '\n', + format_custom_field_delimiter = '', + format_custom_escaping_rule = 'Quoted' + +DESC format(CustomSeparated, $$ +'number''string''array' + +42.42'Some string 1'[1, NULL, 3] + +NULL'Some string 3'[1, 2, NULL] + +$$) +``` + +```response +┌─number─┬─string────────┬─array──────┐ +│ 42.42 │ Some string 1 │ [1,NULL,3] │ +│ ᴺᵁᴸᴸ │ Some string 3 │ [1,2,NULL] │ +└────────┴───────────────┴────────────┘ +``` + ## Template {#template} In Template format ClickHouse first extracts all column values from the row according to the specified template and then tries to infer the @@ -1193,7 +1336,7 @@ DESC format(JSONEachRow, $$ └──────────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings.md#date_time_input_format) +Note: Parsing datetimes during schema inference respect setting [date_time_input_format](/docs/en/operations/settings/settings-formats.md#date_time_input_format) ### input_format_try_infer_dates diff --git a/docs/en/operations/caches.md b/docs/en/operations/caches.md index 86760ec245f..d912b8a5990 100644 --- a/docs/en/operations/caches.md +++ b/docs/en/operations/caches.md @@ -22,5 +22,6 @@ Additional cache types: - [Dictionaries](../sql-reference/dictionaries/index.md) data cache. - Schema inference cache. - [Filesystem cache](storing-data.md) over S3, Azure, Local and other disks. +- [(Experimental) Query result cache](query-result-cache.md). To drop one of the caches, use [SYSTEM DROP ... CACHE](../sql-reference/statements/system.md#drop-mark-cache) statements. diff --git a/docs/en/operations/query-result-cache.md b/docs/en/operations/query-result-cache.md new file mode 100644 index 00000000000..a166cdd143f --- /dev/null +++ b/docs/en/operations/query-result-cache.md @@ -0,0 +1,99 @@ +--- +slug: /en/operations/query-result-cache +sidebar_position: 65 +sidebar_label: Query Result Cache [experimental] +--- + +# Query Result Cache [experimental] + +The query result cache allows to compute SELECT queries just once and to serve further executions of the same query directly from the cache. +Depending on the type of the queries, this can dramatically reduce latency and resource consumption of the ClickHouse server. + +## Background, Design and Limitations + +Query result caches can generally be viewed as transactionally consistent or inconsistent. + +- In transactionally consistent caches, the database invalidates (discards) cached query results if the result of the SELECT query changes + or potentially changes. In ClickHouse, operations which change the data include inserts/updates/deletes in/of/from tables or collapsing + merges. Transactionally consistent caching is especially suitable for OLTP databases, for example + [MySQL](https://dev.mysql.com/doc/refman/5.6/en/query-cache.html) (which removed query result cache after v8.0) and + [Oracle](https://docs.oracle.com/database/121/TGDBA/tune_result_cache.htm). +- In transactionally inconsistent caches, slight inaccuracies in query results are accepted under the assumption that all cache entries are + assigned a validity period after which they expire (e.g. 1 minute) and that the underlying data changes only little during this period. + This approach is overall more suitable for OLAP databases. As an example where transactionally inconsistent caching is sufficient, + consider an hourly sales report in a reporting tool which is simultaneously accessed by multiple users. Sales data changes typically + slowly enough that the database only needs to compute the report once (represented by the first SELECT query). Further queries can be + served directly from the query result cache. In this example, a reasonable validity period could be 30 min. + +Transactionally inconsistent caching is traditionally provided by client tools or proxy packages interacting with the database. As a result, +the same caching logic and configuration is often duplicated. With ClickHouse's query result cache, the caching logic moves to the server +side. This reduces maintenance effort and avoids redundancy. + +:::warning +The query result cache is an experimental feature that should not be used in production. There are known cases (e.g. in distributed query +processing) where wrong results are returned. +::: + +## Configuration Settings and Usage + +Parameter [enable_experimental_query_result_cache](settings/settings.md#enable-experimental-query-result-cache) controls whether query +results are inserted into / retrieved from the cache for the current query or session. For example, the first execution of query + +``` sql +SELECT some_expensive_calculation(column_1, column_2) +FROM table +SETTINGS enable_experimental_query_result_cache = true; +``` + +stores the query result into the query result cache. Subsequent executions of the same query (also with parameter +`enable_experimental_query_result_cache = true`) will read the computed result directly from the cache. + +Sometimes, it is desirable to use the query result cache only passively, i.e. to allow reading from it but not writing into it (if the cache +result is not stored yet). Parameter [enable_experimental_query_result_cache_passive_usage](settings/settings.md#enable-experimental-query-result-cache-passive-usage) +instead of 'enable_experimental_query_result_cache' can be used for that. + +For maximum control, it is generally recommended to provide settings "enable_experimental_query_result_cache" or +"enable_experimental_query_result_cache_passive_usage" only with specific queries. It is also possible to enable caching at user or profile +level but one should keep in mind that all SELECT queries may return a cached results, including monitoring or debugging queries to system +tables. + +The query result cache can be cleared using statement `SYSTEM DROP QUERY RESULT CACHE`. The content of the query result cache is displayed +in system table `SYSTEM.QUERY_RESULT_CACHE`. The number of query result cache hits and misses are shown as events "QueryResultCacheHits" and +"QueryResultCacheMisses" in system table `SYSTEM.EVENTS`. Both counters are only updated for SELECT queries which run with settings +"enable_experimental_query_result_cache = true" or "enable_experimental_query_result_cache_passive_usage = true". Other queries do not +affect the cache miss counter. + +The query result cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can +be changed (see below) but doing so is not recommended for security reasons. + +Query results are referenced in the query result cache by the [Abstract Syntax Tree (AST)](https://en.wikipedia.org/wiki/Abstract_syntax_tree) +of their query. This means that caching is agnostic to upper/lowercase, for example `SELECT 1` and `select 1` are treated as the same query. +To make the matching more natural, all query-level settings related to the query result cache are removed from the AST. + +If the query was aborted due to an exception or user cancellation, no entry is written into the query result cache. + +The size of the query result cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can +be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-result-cache). + +To define how long a query must run at least such that its result can be cached, you can use setting +[query_result_cache_min_query_duration](settings/settings.md#query-result-cache-min-query-duration). For example, the result of query + +``` sql +SELECT some_expensive_calculation(column_1, column_2) +FROM table +SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_duration = 5000; +``` + +is only cached if the query runs longer than 5 seconds. It is also possible to specify how often a query needs to run until its result is +cached - for that use setting [query_result_cache_min_query_runs](settings/settings.md#query-result-cache-min-query-runs). + +Entries in the query result cache become stale after a certain time period (time-to-live). By default, this period is 60 seconds but a +different value can be specified at session, profile or query level using setting [query_result_cache_ttl](settings/settings.md#query-result-cache-ttl). + +Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using +setting [query_result_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-result-cache-store-results-of-queries-with-nondeterministic-functions). + +Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a +row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can +be marked accessible by other users (i.e. shared) by supplying setting +[query_result_cache_share_between_users]{settings/settings.md#query-result-cache-share-between-users}. diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 13873827722..9a67edd75ca 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1270,6 +1270,32 @@ If the table does not exist, ClickHouse will create it. If the structure of the ``` +## query_result_cache {#server_configuration_parameters_query-result-cache} + +[Query result cache](../query-result-cache.md) configuration. + +The following settings are available: + +- `size`: The maximum cache size in bytes. 0 means the query result cache is disabled. Default value: `1073741824` (1 GiB). +- `max_entries`: The maximum number of SELECT query results stored in the cache. Default value: `1024`. +- `max_entry_size`: The maximum size in bytes SELECT query results may have to be saved in the cache. Default value: `1048576` (1 MiB). +- `max_entry_records`: The maximum number of records SELECT query results may have to be saved in the cache. Default value: `30000000` (30 mil). + +:::warning +Data for the query result cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query result cache altogether. +::: + +**Example** + +```xml + + 1073741824 + 1024 + 1048576 + 30000000 + +``` + ## query_thread_log {#server_configuration_parameters-query_thread_log} Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index f254d57ec7d..fae282c861f 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -1,6 +1,6 @@ --- sidebar_label: Settings Overview -sidebar_position: 51 +sidebar_position: 1 slug: /en/operations/settings/ pagination_next: en/operations/settings/settings --- diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index d1f94cf183c..b383e0598a1 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -106,14 +106,20 @@ Possible values: Default value: 1. The delay (in milliseconds) for `INSERT` is calculated by the formula: - ```code max_k = parts_to_throw_insert - parts_to_delay_insert k = 1 + parts_count_in_partition - parts_to_delay_insert delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k) ``` +For example, if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds. -For example if a partition has 299 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` is delayed for `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` milliseconds. +Starting from version 23.1 formula has been changed to: +```code +allowed_parts_over_threshold = parts_to_throw_insert - parts_to_delay_insert +parts_over_threshold = parts_count_in_partition - parts_to_delay_insert + 1 +delay_milliseconds = max(min_delay_to_insert_ms, (max_delay_to_insert * 1000) * parts_over_threshold / allowed_parts_over_threshold) +``` +For example, if a partition has 224 active parts and parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, min_delay_to_insert_ms = 10, `INSERT` is delayed for `max( 10, 1 * 1000 * (224 - 150 + 1) / (300 - 150) ) = 500` milliseconds. ## max_parts_in_total {#max-parts-in-total} diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md new file mode 100644 index 00000000000..cca93641254 --- /dev/null +++ b/docs/en/operations/settings/settings-formats.md @@ -0,0 +1,1486 @@ +--- +sidebar_label: Format Settings +sidebar_position: 52 +slug: /en/operations/settings/formats +toc_max_heading_level: 2 +--- + +# Format settings {#format-settings} + +## input_format_skip_unknown_fields {#input_format_skip_unknown_fields} + +Enables or disables skipping insertion of extra data. + +When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse does not insert extra data and does not throw an exception. + +Supported formats: + +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) +- [TSKV](../../interfaces/formats.md/#tskv) +- All formats with suffixes WithNames/WithNamesAndTypes +- [JSONColumns](../../interfaces/formats.md/#jsoncolumns) +- [MySQLDump](../../interfaces/formats.md/#mysqldump) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_with_names_use_header {#input_format_with_names_use_header} + +Enables or disables checking the column order when inserting data. + +To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table. + +Supported formats: + +- [CSVWithNames](../../interfaces/formats.md/#csvwithnames) +- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) +- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes) +- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames) +- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_with_types_use_header {#input_format_with_types_use_header} + +Controls whether format parser should check if data types from the input data match data types from the target table. + +Supported formats: + +- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) +- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) +- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) +- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) +- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) +- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_defaults_for_omitted_fields {#input_format_defaults_for_omitted_fields} + +When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. + +:::note +When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +::: + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## input_format_null_as_default {#input_format_null_as_default} + +Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). +If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. + +This setting is applicable to [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) queries for text input formats. + +Possible values: + +- 0 — Inserting `NULL` into a not nullable column causes an exception. +- 1 — `NULL` fields are initialized with default column values. + +Default value: `1`. + +## input_format_allow_seeks {#input_format_allow_seeks} + +Allow seeks while reading in ORC/Parquet/Arrow input formats. + +Enabled by default. + +## input_format_max_rows_to_read_for_schema_inference {#input_format_max_rows_to_read_for_schema_inference} + +The maximum rows of data to read for automatic schema inference. + +Default value: `25'000`. + +## column_names_for_schema_inference {#column_names_for_schema_inference} + +The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...' + +## schema_inference_hints {#schema_inference_hints} + +The list of column names and types to use as hints in schema inference for formats without schema. + +Example: + +Query: +```sql +desc format(JSONEachRow, '{"x" : 1, "y" : "String", "z" : "0.0.0.0" }') settings schema_inference_hints='x UInt8, z IPv4'; +``` + +Result: +```sql +x UInt8 +y Nullable(String) +z IPv4 +``` + +## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} + +Controls making inferred types `Nullable` in schema inference for formats without information about nullability. +If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. + +Default value: `true`. + +## input_format_try_infer_integers {#input_format_try_infer_integers} + +If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. + +Enabled by default. + +## input_format_try_infer_dates {#input_format_try_infer_dates} + +If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. + +Enabled by default. + +## input_format_try_infer_datetimes {#input_format_try_infer_datetimes} + +If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. + +Enabled by default. + +## date_time_input_format {#date_time_input_format} + +Allows choosing a parser of the text representation of date and time. + +The setting does not apply to [date and time functions](../../sql-reference/functions/date-time-functions.md). + +Possible values: + +- `'best_effort'` — Enables extended parsing. + + ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. + +- `'basic'` — Use basic parser. + + ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. + +Default value: `'basic'`. + +See also: + +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) + +## date_time_output_format {#date_time_output_format} + +Allows choosing different output formats of the text representation of date and time. + +Possible values: + +- `simple` - Simple output format. + + ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. + +- `iso` - ISO output format. + + ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). + +- `unix_timestamp` - Unix timestamp output format. + + ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. + +Default value: `simple`. + +See also: + +- [DateTime data type.](../../sql-reference/data-types/datetime.md) +- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) + +## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} + +Deserialization of IPv4 will use default values instead of throwing exception on conversion error. + +Disabled by default. + +## input_format_ipv6_default_on_conversion_error {#input_format_ipv6_default_on_conversion_error} + +Deserialization of IPV6 will use default values instead of throwing exception on conversion error. + +Disabled by default. + +## bool_true_representation {#bool_true_representation} + +Text to represent true bool value in TSV/CSV/Vertical/Pretty formats. + +Default value: `true` + +## bool_false_representation {#bool_false_representation} + +Text to represent false bool value in TSV/CSV/Vertical/Pretty formats. + +Default value: `false` + +## output_format_decimal_trailing_zeros {#output_format_decimal_trailing_zeros} + +Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23. + +Disabled by default. + +## input_format_allow_errors_num {#input_format_allow_errors_num} + +Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). + +The default value is 0. + +Always pair it with `input_format_allow_errors_ratio`. + +If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. + +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. + +## input_format_allow_errors_ratio {#input_format_allow_errors_ratio} + +Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). +The percentage of errors is set as a floating-point number between 0 and 1. + +The default value is 0. + +Always pair it with `input_format_allow_errors_num`. + +If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. + +If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. + +## format_schema {#format-schema} + +This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. + +## output_format_enable_streaming {#output_format_enable_streaming} + +Enable streaming in output formats that support it. + +Disabled by default. + +## output_format_write_statistics {#output_format_write_statistics} + +Write statistics about read rows, bytes, time elapsed in suitable output formats. + +Enabled by default + +## insert_distributed_one_random_shard {#insert_distributed_one_random_shard} + +Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key. + +By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards. + +Possible values: + +- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. +- 1 — Insertion is done randomly among all available shards when no distributed key is given. + +Default value: `0`. + +## JSON formats settings {#json-formats-settings} + +## input_format_import_nested_json {#input_format_import_nested_json} + +Enables or disables the insertion of JSON data with nested objects. + +Supported formats: + +- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. + +See also: + +- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. + +## input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers} + +Allow parsing bools as numbers in JSON input formats. + +Enabled by default. + +## input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings} + +Allow parsing numbers as strings in JSON input formats. + +Disabled by default. + +## input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings} + +Allow parsing JSON objects as strings in JSON input formats. + +Example: + +```sql +SET input_format_json_read_objects_as_strings = 1; +CREATE TABLE test (id UInt64, obj String, date Date) ENGINE=Memory(); +INSERT INTO test FORMAT JSONEachRow {"id" : 1, "obj" : {"a" : 1, "b" : "Hello"}, "date" : "2020-01-01"}; +SELECT * FROM test; +``` + +Result: + +``` +┌─id─┬─obj──────────────────────┬───────date─┐ +│ 1 │ {"a" : 1, "b" : "Hello"} │ 2020-01-01 │ +└────┴──────────────────────────┴────────────┘ +``` + +Disabled by default. + +## input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata} + +For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1, +the types from metadata in input data will be compared with the types of the corresponding columns from the table. + +Enabled by default. + +## output_format_json_quote_64bit_integers {#output_format_json_quote_64bit_integers} + +Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format. +Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. + +Possible values: + +- 0 — Integers are output without quotes. +- 1 — Integers are enclosed in quotes. + +Default value: 1. + +## output_format_json_quote_64bit_floats {#output_format_json_quote_64bit_floats} + +Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats. + +Disabled by default. + +## output_format_json_quote_denormals {#output_format_json_quote_denormals} + +Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. + +**Example** + +Consider the following table `account_orders`: + +```text +┌─id─┬─name───┬─duration─┬─period─┬─area─┐ +│ 1 │ Andrew │ 20 │ 0 │ 400 │ +│ 2 │ John │ 40 │ 0 │ 0 │ +│ 3 │ Bob │ 15 │ 0 │ -100 │ +└────┴────────┴──────────┴────────┴──────┘ +``` + +When `output_format_json_quote_denormals = 0`, the query returns `null` values in output: + +```sql +SELECT area/period FROM account_orders FORMAT JSON; +``` + +```json +{ + "meta": + [ + { + "name": "divide(area, period)", + "type": "Float64" + } + ], + + "data": + [ + { + "divide(area, period)": null + }, + { + "divide(area, period)": null + }, + { + "divide(area, period)": null + } + ], + + "rows": 3, + + "statistics": + { + "elapsed": 0.003648093, + "rows_read": 3, + "bytes_read": 24 + } +} +``` + +When `output_format_json_quote_denormals = 1`, the query returns: + +```json +{ + "meta": + [ + { + "name": "divide(area, period)", + "type": "Float64" + } + ], + + "data": + [ + { + "divide(area, period)": "inf" + }, + { + "divide(area, period)": "-nan" + }, + { + "divide(area, period)": "-inf" + } + ], + + "rows": 3, + + "statistics": + { + "elapsed": 0.000070241, + "rows_read": 3, + "bytes_read": 24 + } +} +``` + +## output_format_json_quote_decimals {#output_format_json_quote_decimals} + +Controls quoting of decimals in JSON output formats. + +Disabled by default. + +## output_format_json_escape_forward_slashes {#output_format_json_escape_forward_slashes} + +Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped. + +Enabled by default. + +## output_format_json_named_tuples_as_objects {#output_format_json_named_tuples_as_objects} + +Serialize named tuple columns as JSON objects. + +Enabled by default. + +## input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects} + +Parse named tuple columns as JSON objects. + +Enabled by default. + +## input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple} + +Insert default values for missing elements in JSON object while parsing named tuple. +This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled. + +Enabled by default. + +## output_format_json_array_of_rows {#output_format_json_array_of_rows} + +Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format. + +Possible values: + +- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. +- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. + +Default value: `0`. + +**Example of a query with the enabled setting** + +Query: + +```sql +SET output_format_json_array_of_rows = 1; +SELECT number FROM numbers(3) FORMAT JSONEachRow; +``` + +Result: + +```text +[ +{"number":"0"}, +{"number":"1"}, +{"number":"2"} +] +``` + +**Example of a query with the disabled setting** + +Query: + +```sql +SET output_format_json_array_of_rows = 0; +SELECT number FROM numbers(3) FORMAT JSONEachRow; +``` + +Result: + +```text +{"number":"0"} +{"number":"1"} +{"number":"2"} +``` + +## output_format_json_validate_utf8 {#output_format_json_validate_utf8} + +Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8. + +Disabled by default. + +## format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name} + +The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format. +Column type should be String. If value is empty, default names `row_{i}`will be used for object names. + +Default value: ''. + +## TSV format settings {#tsv-format-settings} + +### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} + +When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Disabled by default. + +### input_format_tsv_enum_as_number {#input_format_tsv_enum_as_number} + +When enabled, always treat enum values as enum ids for TSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. + +Possible values: + +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. + +Default value: 0. + +**Example** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_tsv_enum_as_number` setting is enabled: + +Query: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Query: + +```sql +SET input_format_tsv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; +``` + +throws an exception. + +When the `input_format_tsv_enum_as_number` setting is disabled: + +Query: + +```sql +SET input_format_tsv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; +INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; +SELECT * FROM table_with_enum_column_for_tsv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value──┐ +│ 103 │ first │ +└─────┴────────┘ +``` + +### input_format_tsv_use_best_effort_in_schema_inference {#input_format_tsv_use_best_effort_in_schema_inference} + +Use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be treated as String. + +Enabled by default. + +### input_format_tsv_skip_first_lines {#input_format_tsv_skip_first_lines} + +The number of lines to skip at the beginning of data in TSV input format. + +Default value: `0`. + +### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line} + +Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). + +Disabled by default. + +### format_tsv_null_representation {#format_tsv_null_representation} + +Defines the representation of `NULL` for [TSV](../../interfaces/formats.md/#tabseparated) output and input formats. User can set any string as a value, for example, `My NULL`. + +Default value: `\N`. + +**Examples** + +Query + +```sql +SELECT * FROM tsv_custom_null FORMAT TSV; +``` + +Result + +```text +788 +\N +\N +``` + +Query + +```sql +SET format_tsv_null_representation = 'My NULL'; +SELECT * FROM tsv_custom_null FORMAT TSV; +``` + +Result + +```text +788 +My NULL +My NULL +``` + +## CSV format settings {#csv-format-settings} + +### format_csv_delimiter {#format_csv_delimiter} + +The character is interpreted as a delimiter in the CSV data. + +Default value: `,`. + +### format_csv_allow_single_quotes {#format_csv_allow_single_quotes} + +If it is set to true, allow strings in single quotes. + +Enabled by default. + +### format_csv_allow_double_quotes {#format_csv_allow_double_quotes} + +If it is set to true, allow strings in double quotes. + +Enabled by default. + +### output_format_csv_crlf_end_of_line {#output_format_csv_crlf_end_of_line} + +Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). + +Disabled by default. + +### input_format_csv_enum_as_number {#input_format_csv_enum_as_number} + +When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. + +Possible values: + +- 0 — Enum values are parsed as values or as enum IDs. +- 1 — Enum values are parsed only as enum IDs. + +Default value: 0. + +**Examples** + +Consider the table: + +```sql +CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); +``` + +When the `input_format_csv_enum_as_number` setting is enabled: + +Query: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +``` + +Query: + +```sql +SET input_format_csv_enum_as_number = 1; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' +``` + +throws an exception. + +When the `input_format_csv_enum_as_number` setting is disabled: + +Query: + +```sql +SET input_format_csv_enum_as_number = 0; +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 +INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' +SELECT * FROM table_with_enum_column_for_csv_insert; +``` + +Result: + +```text +┌──Id─┬─Value──┐ +│ 102 │ second │ +└─────┴────────┘ +┌──Id─┬─Value─┐ +│ 103 │ first │ +└─────┴───────┘ +``` + +### input_format_csv_arrays_as_nested_csv {#input_format_csv_arrays_as_nested_csv} + +When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted. + +Disabled by default. + +### input_format_csv_empty_as_default {#input_format_csv_empty_as_default} + +When enabled, replace empty input fields in CSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. + +Enabled by default. + +### input_format_csv_use_best_effort_in_schema_inference {#input_format_csv_use_best_effort_in_schema_inference} + +Use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be treated as String. + +Enabled by default. + +### input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} + +The number of lines to skip at the beginning of data in CSV input format. + +Default value: `0`. + +### format_csv_null_representation {#format_csv_null_representation} + +Defines the representation of `NULL` for [CSV](../../interfaces/formats.md/#csv) output and input formats. User can set any string as a value, for example, `My NULL`. + +Default value: `\N`. + +**Examples** + +Query + +```sql +SELECT * from csv_custom_null FORMAT CSV; +``` + +Result + +```text +788 +\N +\N +``` + +Query + +```sql +SET format_csv_null_representation = 'My NULL'; +SELECT * FROM csv_custom_null FORMAT CSV; +``` + +Result + +```text +788 +My NULL +My NULL +``` + +## Values format settings {#values-format-settings} + +### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} + +Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md/#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section. + +Possible values: + +- 0 — Disabled. + + In this case, you must provide formatted data. See the [Formats](../../interfaces/formats.md) section. + +- 1 — Enabled. + + In this case, you can use an SQL expression as a value, but data insertion is much slower this way. If you insert only formatted data, then ClickHouse behaves as if the setting value is 0. + +Default value: 1. + +Example of Use + +Insert the [DateTime](../../sql-reference/data-types/datetime.md) type value with the different settings. + +``` sql +SET input_format_values_interpret_expressions = 0; +INSERT INTO datetime_t VALUES (now()) +``` + +``` text +Exception on client: +Code: 27. DB::Exception: Cannot parse input: expected ) before: now()): (at row 1) +``` + +``` sql +SET input_format_values_interpret_expressions = 1; +INSERT INTO datetime_t VALUES (now()) +``` + +``` text +Ok. +``` + +The last query is equivalent to the following: + +``` sql +SET input_format_values_interpret_expressions = 0; +INSERT INTO datetime_t SELECT now() +``` + +``` text +Ok. +``` + +### input_format_values_deduce_templates_of_expressions {#input_format_values_deduce_templates_of_expressions} + +Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md/#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +For the following query: + +``` sql +INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... +``` + +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). +- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). +- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. + +### input_format_values_accurate_types_of_literals {#input_format_values_accurate_types_of_literals} + +This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. Expressions for some column may have the same structure, but contain numeric literals of different types, e.g. + +``` sql +(..., abs(0), ...), -- UInt64 literal +(..., abs(3.141592654), ...), -- Float64 literal +(..., abs(-1), ...), -- Int64 literal +``` + +Possible values: + +- 0 — Disabled. + + In this case, ClickHouse may use a more general type for some literals (e.g., `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. + +- 1 — Enabled. + + In this case, ClickHouse checks the actual type of literal and uses an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. + +Default value: 1. + +## Arrow format settings {#arrow-format-settings} + +### input_format_arrow_import_nested {#input_format_arrow_import_nested} + +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. + +Possible values: + +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. + +Default value: `0`. + +### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} + +Ignore case when matching Arrow column names with ClickHouse column names. + +Disabled by default. + +### input_format_arrow_allow_missing_columns {#input_format_arrow_allow_missing_columns} + +While importing data, when column is not found in schema default value will be used instead of error. + +Disabled by default. + +### input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference {#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format Arrow. + +Disabled by default. + +### output_format_arrow_low_cardinality_as_dictionary {#output_format_arrow_low_cardinality_as_dictionary} + +Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format for `SELECT` queries. + +Possible values: + +- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. +- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. + +Default value: `0`. + +### output_format_arrow_string_as_string {#output_format_arrow_string_as_string} + +Use Arrow String type instead of Binary for String columns. + +Disabled by default. + +## ORC format settings {#orc-format-settings} + +### input_format_orc_import_nested {#input_format_orc_import_nested} + +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. + +Possible values: + +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. + +Default value: `0`. + +### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} + +Batch size when reading ORC stripes. + +Default value: `100'000` + +### input_format_orc_case_insensitive_column_matching {#input_format_orc_case_insensitive_column_matching} + +Ignore case when matching ORC column names with ClickHouse column names. + +Disabled by default. + +### input_format_orc_allow_missing_columns {#input_format_orc_allow_missing_columns} + +While importing data, when column is not found in schema default value will be used instead of error. + +Disabled by default. + +### input_format_orc_skip_columns_with_unsupported_types_in_schema_inference {#input_format_orc_skip_columns_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format Arrow. + +Disabled by default. + +### output_format_orc_string_as_string {#output_format_orc_string_as_string} + +Use ORC String type instead of Binary for String columns. + +Disabled by default. + +## Parquet format settings {#parquet-format-settings} + +### input_format_parquet_import_nested {#input_format_parquet_import_nested} + +Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. + +Possible values: + +- 0 — Data can not be inserted into `Nested` columns as an array of structs. +- 1 — Data can be inserted into `Nested` columns as an array of structs. + +Default value: `0`. + +### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} + +Ignore case when matching Parquet column names with ClickHouse column names. + +Disabled by default. + +### output_format_parquet_row_group_size {#output_format_parquet_row_group_size} + +Row group size in rows. + +Default value: `1'000'000`. + +### input_format_parquet_allow_missing_columns {#input_format_parquet_allow_missing_columns} + +While importing data, when column is not found in schema default value will be used instead of error. + +Disabled by default. + +### input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference {#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format Parquet. + +Disabled by default. + +### output_format_parquet_string_as_string {#output_format_parquet_string_as_string} + +Use Parquet String type instead of Binary for String columns. + +Disabled by default. + +## Hive format settings {#hive-format-settings} + +### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} + +Delimiter between fields in Hive Text File. + +Default value: `\x01`. + +### input_format_hive_text_collection_items_delimiter {#input_format_hive_text_collection_items_delimiter} + +Delimiter between collection(array or map) items in Hive Text File. + +Default value: `\x02`. + +### input_format_hive_text_map_keys_delimiter {#input_format_hive_text_map_keys_delimiter} + +Delimiter between a pair of map key/values in Hive Text File. + +Default value: `\x03`. + +## MsgPack format settings {#msgpack-format-settings} + +### input_format_msgpack_number_of_columns {#input_format_msgpack_number_of_columns} + +The number of columns in inserted MsgPack data. Used for automatic schema inference from data. + +Default value: `0`. + +### output_format_msgpack_uuid_representation {#output_format_msgpack_uuid_representation} + +The way how to output UUID in MsgPack format. +Possible values: + +- `bin` - as 16-bytes binary. +- `str` - as a string of 36 bytes. +- `ext` - as extention with ExtType = 2. + +Default value: `ext`. + + +## Protobuf format settings {#protobuf-format-settings} + +### input_format_protobuf_flatten_google_wrappers {#input_format_protobuf_flatten_google_wrappers} + +Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls. + +Disabled by default. + +### output_format_protobuf_nullables_with_google_wrappers {#output_format_protobuf_nullables_with_google_wrappers} + +When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized. + +Disabled by default. + +## Avro format settings {#avro-format-settings} + +### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} + +Enables using fields that are not specified in [Avro](../../interfaces/formats.md/#data-format-avro) or [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format schema. When a field is not found in the schema, ClickHouse uses the default value instead of throwing an exception. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 0. + +### format_avro_schema_registry_url {#format_avro_schema_registry_url} + +Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. + +Default value: `Empty`. + +### output_format_avro_codec {#output_format_avro_codec} + +Sets the compression codec used for output Avro file. + +Type: string + +Possible values: + +- `null` — No compression +- `deflate` — Compress with Deflate (zlib) +- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) + +Default value: `snappy` (if available) or `deflate`. + +### output_format_avro_sync_interval {#output_format_avro_sync_interval} + +Sets minimum data size (in bytes) between synchronization markers for output Avro file. + +Type: unsigned int + +Possible values: 32 (32 bytes) - 1073741824 (1 GiB) + +Default value: 32768 (32 KiB) + +### output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern} + +Regexp of column names of type String to output as Avro `string` (default is `bytes`). +RE2 syntax is supported. + +Type: string + +### output_format_avro_rows_in_file {#output_format_avro_rows_in_file} + +Max rows in a file (if permitted by storage). + +Default value: `1`. + +## Pretty formats settings {#pretty-formats-settings} + +### output_format_pretty_max_rows {#output_format_pretty_max_rows} + +Rows limit for Pretty formats. + +Default value: `10'000`. + +### output_format_pretty_max_column_pad_width {#output_format_pretty_max_column_pad_width} + +Maximum width to pad all values in a column in Pretty formats. + +Default value: `250`. + +### output_format_pretty_max_value_width {#output_format_pretty_max_value_width} + +Limits the width of value displayed in [Pretty](../../interfaces/formats.md/#pretty) formats. If the value width exceeds the limit, the value is cut. + +Possible values: + +- Positive integer. +- 0 — The value is cut completely. + +Default value: `10000` symbols. + +**Examples** + +Query: +```sql +SET output_format_pretty_max_value_width = 10; +SELECT range(number) FROM system.numbers LIMIT 10 FORMAT PrettyCompactNoEscapes; +``` +Result: +```text +┌─range(number)─┐ +│ [] │ +│ [0] │ +│ [0,1] │ +│ [0,1,2] │ +│ [0,1,2,3] │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +│ [0,1,2,3,4⋯ │ +└───────────────┘ +``` + +Query with zero width: +```sql +SET output_format_pretty_max_value_width = 0; +SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes; +``` +Result: +```text +┌─range(number)─┐ +│ ⋯ │ +│ ⋯ │ +│ ⋯ │ +│ ⋯ │ +│ ⋯ │ +└───────────────┘ +``` + +### output_format_pretty_color {#output_format_pretty_color} + +Use ANSI escape sequences to paint colors in Pretty formats. + +Enabled by default. + +### output_format_pretty_grid_charset {#output_format_pretty_grid_charset} + +Allows changing a charset which is used for printing grids borders. Available charsets are UTF-8, ASCII. + +**Example** + +``` text +SET output_format_pretty_grid_charset = 'UTF-8'; +SELECT * FROM a; +┌─a─┐ +│ 1 │ +└───┘ + +SET output_format_pretty_grid_charset = 'ASCII'; +SELECT * FROM a; ++-a-+ +| 1 | ++---+ +``` + +### output_format_pretty_row_numbers {#output_format_pretty_row_numbers} + +Adds row numbers to output in the [Pretty](../../interfaces/formats.md/#pretty) format. + +Possible values: + +- 0 — Output without row numbers. +- 1 — Output with row numbers. + +Default value: `0`. + +**Example** + +Query: + +```sql +SET output_format_pretty_row_numbers = 1; +SELECT TOP 3 name, value FROM system.settings; +``` + +Result: +```text + ┌─name────────────────────┬─value───┐ +1. │ min_compress_block_size │ 65536 │ +2. │ max_compress_block_size │ 1048576 │ +3. │ max_block_size │ 65505 │ + └─────────────────────────┴─────────┘ +``` + +## Template format settings {#template-format-settings} + +### format_template_resultset {#format_template_resultset} + +Path to file which contains format string for result set (for Template format). + +### format_template_row {#format_template_row} + +Path to file which contains format string for rows (for Template format). + +### format_template_rows_between_delimiter {#format_template_rows_between_delimiter} + +Delimiter between rows (for Template format). + +## CustomSeparated format settings {custom-separated-format-settings} + +### format_custom_escaping_rule {#format_custom_escaping_rule} + +Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Possible values: + +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). + +Default value: `'Escaped'`. + +### format_custom_field_delimiter {#format_custom_field_delimiter} + +Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `'\t'`. + +### format_custom_row_before_delimiter {#format_custom_row_before_delimiter} + +Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +### format_custom_row_after_delimiter {#format_custom_row_after_delimiter} + +Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `'\n'`. + +### format_custom_row_between_delimiter {#format_custom_row_between_delimiter} + +Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +### format_custom_result_before_delimiter {#format_custom_result_before_delimiter} + +Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +### format_custom_result_after_delimiter {#format_custom_result_after_delimiter} + +Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. + +Default value: `''`. + +## Regexp format settings {#regexp-format-settings} + +### format_regexp_escaping_rule {#format_regexp_escaping_rule} + +Field escaping rule. + +Possible values: + +- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). +- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). +- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). +- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). +- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). +- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). + +Default value: `Raw`. + +### format_regexp_skip_unmatched {#format_regexp_skip_unmatched} + +Skip lines unmatched by regular expression. + +Disabled by default. + +## CapnProto format settings {#capn-proto-format-settings} + +### format_capn_proto_enum_comparising_mode {#format_capn_proto_enum_comparising_mode} + +Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md/#capnproto) `Enum` data type from schema. + +Possible values: + +- `'by_values'` — Values in enums should be the same, names can be different. +- `'by_names'` — Names in enums should be the same, values can be different. +- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. + +Default value: `'by_values'`. + +## MySQLDump format settings {#musqldump-format-settings} + +### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) + +The name of the table from which to read data from in MySQLDump input format. + +### input_format_mysql_dump_map_columns (#input_format_mysql_dump_map_columns) + +Enables matching columns from table in MySQL dump and columns from ClickHouse table by names in MySQLDump input format. + +Possible values: + +- 0 — Disabled. +- 1 — Enabled. + +Default value: 1. + +## SQLInsert format settings {#sqlinsert-format-settings} + +### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} + +The maximum number of rows in one INSERT statement. + +Default value: `65505`. + +### output_format_sql_insert_table_name {#output_format_sql_insert_table_name} + +The name of table that will be used in the output INSERT statement. + +Default value: `'table''`. + +### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} + +Include column names in INSERT statement. + +Default value: `true`. + +### output_format_sql_insert_use_replace {#output_format_sql_insert_use_replace} + +Use REPLACE keyword instead of INSERT. + +Default value: `false`. + +### output_format_sql_insert_quote_names {#output_format_sql_insert_quote_names} + +Quote column names with "`" characters + +Default value: `true`. + +## BSONEachRow format settings {#bson-each-row-format-settings} + +### output_format_bson_string_as_string {#output_format_bson_string_as_string} + +Use BSON String type instead of Binary for String columns. + +Disabled by default. + +### input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference} + +Allow skipping columns with unsupported types while schema inference for format BSONEachRow. + +Disabled by default. + +## RowBinary format settings {#row-binary-format-settings} + +### format_binary_max_string_size {#format_binary_max_string_size} + +The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit. + +Default value: `1GiB` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 3be31946a36..9c64824d5ea 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1,10 +1,11 @@ --- -sidebar_label: Settings -sidebar_position: 52 +sidebar_label: Core Settings +sidebar_position: 2 slug: /en/operations/settings/settings +toc_max_heading_level: 2 --- -# Settings +# Core Settings ## additional_table_filters @@ -1300,6 +1301,81 @@ Possible values: Default value: `3`. +## enable_experimental_query_result_cache {#enable-experimental-query-result-cache} + +If turned on, results of SELECT queries are stored in and (if available) retrieved from the [query result cache](../query-result-cache.md). + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `0`. + +## enable_experimental_query_result_cache_passive_usage {#enable-experimental-query-result-cache-passive-usage} + +If turned on, results of SELECT queries are (if available) retrieved from the [query result cache](../query-result-cache.md). + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `0`. + +## query_result_cache_store_results_of_queries_with_nondeterministic_functions {#query-result-cache-store-results-of-queries-with-nondeterministic-functions} + +If turned on, then results of SELECT queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query result cache](../query-result-cache.md). + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `0`. + +## query_result_cache_min_query_runs {#query-result-cache-min-query-runs} + +Minimum number of times a SELECT query must run before its result is stored in the [query result cache](../query-result-cache.md). + +Possible values: + +- Positive integer >= 0. + +Default value: `0` + +## query_result_cache_min_query_duration {#query-result-cache-min-query-duration} + +Minimum duration in milliseconds a query needs to run for its result to be stored in the [query result cache](../query-result-cache.md). + +Possible values: + +- Positive integer >= 0. + +Default value: `0` + +## query_result_cache_ttl {#query-result-cache-ttl} + +After this time in seconds entries in the [query result cache](../query-result-cache.md) become stale. + +Possible values: + +- Positive integer >= 0. + +Default value: `60` + +## query_result_cache_share_between_users {#query-result-cache-share-between-users} + +If turned on, the result of SELECT queries cached in the [query result cache](../query-result-cache.md) can be read by other users. +It is not recommended to enable this setting due to security reasons. + +Possible values: + +- 0 - Disabled +- 1 - Enabled + +Default value: `0`. + ## insert_quorum {#settings-insert_quorum} Enables the quorum writes. @@ -1394,7 +1470,90 @@ By default, blocks inserted into replicated tables by the `INSERT` statement are For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)). For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window). -## async_insert_deduplicate {#settings-async-insert-deduplicate} +## Asynchronous Insert settings +### async_insert {#async-insert} + +Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts. + +If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. + +The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. + +If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted. + +Possible values: + +- 0 — Insertions are made synchronously, one after another. +- 1 — Multiple asynchronous insertions enabled. + +Default value: `0`. + +### async_insert_threads {#async-insert-threads} + +The maximum number of threads for background data parsing and insertion. + +Possible values: + +- Positive integer. +- 0 — Asynchronous insertions are disabled. + +Default value: `16`. + +### wait_for_async_insert {#wait-for-async-insert} + +Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` even if the data wasn't inserted. + +Possible values: + +- 0 — Server returns `OK` even if the data is not yet inserted. +- 1 — Server returns `OK` only after the data is inserted. + +Default value: `1`. + +### wait_for_async_insert_timeout {#wait-for-async-insert-timeout} + +The timeout in seconds for waiting for processing of asynchronous insertion. + +Possible values: + +- Positive integer. +- 0 — Disabled. + +Default value: [lock_acquire_timeout](#lock_acquire_timeout). + +### async_insert_max_data_size {#async-insert-max-data-size} + +The maximum size of the unparsed data in bytes collected per query before being inserted. + +Possible values: + +- Positive integer. +- 0 — Asynchronous insertions are disabled. + +Default value: `100000`. + +### async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms} + +The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data. + +Possible values: + +- Positive integer. +- 0 — Timeout disabled. + +Default value: `200`. + +### async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms} + +The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded. + +Possible values: + +- Positive integer. +- 0 — Timeout disabled. + +Default value: `0`. +### async_insert_deduplicate {#settings-async-insert-deduplicate} Enables or disables insert deduplication of `ASYNC INSERT` (for Replicated\* tables). @@ -1434,7 +1593,7 @@ The setting allows a user to provide own deduplication semantic in MergeTree/Rep For example, by providing a unique value for the setting in each INSERT statement, user can avoid the same inserted data being deduplicated. -Possilbe values: +Possible values: - Any string @@ -2791,7 +2950,63 @@ Enables or disables truncate before insert in [File](../../engines/table-engines Possible values: - 0 — `INSERT` query appends new data to the end of the file. -- 1 — `INSERT` replaces existing content of the file with the new data. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## s3_truncate_on_insert + +Enables or disables truncate before inserts in s3 engine tables. If disabled, an exception will be thrown on insert attempts if an S3 object already exists. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## hdfs_truncate_on_insert + +Enables or disables truncation before an insert in hdfs engine tables. If disabled, an exception will be thrown on an attempt to insert if a file in HDFS already exists. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## engine_file_allow_create_multiple_files + +Enables or disables creating a new file on each insert in file engine tables if the format has the suffix (`JSON`, `ORC`, `Parquet`, etc.). If enabled, on each insert a new file will be created with a name following this pattern: + +`data.Parquet` -> `data.1.Parquet` -> `data.2.Parquet`, etc. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## s3_create_new_file_on_insert + +Enables or disables creating a new file on each insert in s3 engine tables. If enabled, on each insert a new S3 object will be created with the key, similar to this pattern: + +initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. + +Default value: `0`. + +## hdfs_create_new_file_on_insert + +Enables or disables creating a new file on each insert in HDFS engine tables. If enabled, on each insert a new HDFS file will be created with the name, similar to this pattern: + +initial: `data.Parquet.gz` -> `data.1.Parquet.gz` -> `data.2.Parquet.gz`, etc. + +Possible values: +- 0 — `INSERT` query appends new data to the end of the file. +- 1 — `INSERT` query replaces existing content of the file with the new data. Default value: `0`. @@ -3419,88 +3634,6 @@ Default value: `0`. See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement. -## async_insert {#async-insert} - -Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts. - -If enabled, the data is combined into batches before the insertion into tables, so it is possible to do small and frequent insertions into ClickHouse (up to 15000 queries per second) without buffer tables. - -The data is inserted either after the [async_insert_max_data_size](#async-insert-max-data-size) is exceeded or after [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) milliseconds since the first `INSERT` query. If the [async_insert_stale_timeout_ms](#async-insert-stale-timeout-ms) is set to a non-zero value, the data is inserted after `async_insert_stale_timeout_ms` milliseconds since the last query. - -If [wait_for_async_insert](#wait-for-async-insert) is enabled, every client will wait for the data to be processed and flushed to the table. Otherwise, the query would be processed almost instantly, even if the data is not inserted. - -Possible values: - -- 0 — Insertions are made synchronously, one after another. -- 1 — Multiple asynchronous insertions enabled. - -Default value: `0`. - -## async_insert_threads {#async-insert-threads} - -The maximum number of threads for background data parsing and insertion. - -Possible values: - -- Positive integer. -- 0 — Asynchronous insertions are disabled. - -Default value: `16`. - -## wait_for_async_insert {#wait-for-async-insert} - -Enables or disables waiting for processing of asynchronous insertion. If enabled, server will return `OK` only after the data is inserted. Otherwise, it will return `OK` even if the data wasn't inserted. - -Possible values: - -- 0 — Server returns `OK` even if the data is not yet inserted. -- 1 — Server returns `OK` only after the data is inserted. - -Default value: `1`. - -## wait_for_async_insert_timeout {#wait-for-async-insert-timeout} - -The timeout in seconds for waiting for processing of asynchronous insertion. - -Possible values: - -- Positive integer. -- 0 — Disabled. - -Default value: [lock_acquire_timeout](#lock_acquire_timeout). - -## async_insert_max_data_size {#async-insert-max-data-size} - -The maximum size of the unparsed data in bytes collected per query before being inserted. - -Possible values: - -- Positive integer. -- 0 — Asynchronous insertions are disabled. - -Default value: `100000`. - -## async_insert_busy_timeout_ms {#async-insert-busy-timeout-ms} - -The maximum timeout in milliseconds since the first `INSERT` query before inserting collected data. - -Possible values: - -- Positive integer. -- 0 — Timeout disabled. - -Default value: `200`. - -## async_insert_stale_timeout_ms {#async-insert-stale-timeout-ms} - -The maximum timeout in milliseconds since the last `INSERT` query before dumping collected data. If enabled, the settings prolongs the [async_insert_busy_timeout_ms](#async-insert-busy-timeout-ms) with every `INSERT` query as long as [async_insert_max_data_size](#async-insert-max-data-size) is not exceeded. - -Possible values: - -- Positive integer. -- 0 — Timeout disabled. - -Default value: `0`. ## alter_partition_verbose_result {#alter-partition-verbose-result} @@ -3591,37 +3724,39 @@ Read more about [memory overcommit](memory-overcommit.md). Default value: `1GiB`. -## schema_inference_use_cache_for_file {schema_inference_use_cache_for_file} +## Schema Inference settings + +### schema_inference_use_cache_for_file {schema_inference_use_cache_for_file} Enable schemas cache for schema inference in `file` table function. Default value: `true`. -## schema_inference_use_cache_for_s3 {schema_inference_use_cache_for_s3} +### schema_inference_use_cache_for_s3 {schema_inference_use_cache_for_s3} Enable schemas cache for schema inference in `s3` table function. Default value: `true`. -## schema_inference_use_cache_for_url {schema_inference_use_cache_for_url} +### schema_inference_use_cache_for_url {schema_inference_use_cache_for_url} Enable schemas cache for schema inference in `url` table function. Default value: `true`. -## schema_inference_use_cache_for_hdfs {schema_inference_use_cache_for_hdfs} +### schema_inference_use_cache_for_hdfs {schema_inference_use_cache_for_hdfs} Enable schemas cache for schema inference in `hdfs` table function. Default value: `true`. -## schema_inference_cache_require_modification_time_for_url {#schema_inference_cache_require_modification_time_for_url} +### schema_inference_cache_require_modification_time_for_url {#schema_inference_cache_require_modification_time_for_url} Use schema from cache for URL with last modification time validation (for urls with Last-Modified header). If this setting is enabled and URL doesn't have Last-Modified header, schema from cache won't be used. Default value: `true`. -## use_structure_from_insertion_table_in_table_functions {use_structure_from_insertion_table_in_table_functions} +### use_structure_from_insertion_table_in_table_functions {use_structure_from_insertion_table_in_table_functions} Use structure from insertion table instead of schema inference from data. @@ -3670,1485 +3805,6 @@ Possible values: Default value: `0`. -!!! note "Warning" - Use this setting only for backward compatibility if your use cases depend on old syntax. - -# Format settings {#format-settings} - -## input_format_skip_unknown_fields {#input_format_skip_unknown_fields} - -Enables or disables skipping insertion of extra data. - -When writing data, ClickHouse throws an exception if input data contain columns that do not exist in the target table. If skipping is enabled, ClickHouse does not insert extra data and does not throw an exception. - -Supported formats: - -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) -- [TSKV](../../interfaces/formats.md/#tskv) -- All formats with suffixes WithNames/WithNamesAndTypes -- [JSONColumns](../../interfaces/formats.md/#jsoncolumns) -- [MySQLDump](../../interfaces/formats.md/#mysqldump) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_with_names_use_header {#input_format_with_names_use_header} - -Enables or disables checking the column order when inserting data. - -To improve insert performance, we recommend disabling this check if you are sure that the column order of the input data is the same as in the target table. - -Supported formats: - -- [CSVWithNames](../../interfaces/formats.md/#csvwithnames) -- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) -- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames) -- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) -- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames) -- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) -- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames) -- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) -- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames) -- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes) -- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames) -- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_with_types_use_header {#input_format_with_types_use_header} - -Controls whether format parser should check if data types from the input data match data types from the target table. - -Supported formats: - -- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes) -- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes) -- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes) -- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes) -- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes) -- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_defaults_for_omitted_fields {#input_format_defaults_for_omitted_fields} - -When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes. - :::note -When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance. +Use this setting only for backward compatibility if your use cases depend on old syntax. ::: - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## input_format_null_as_default {#input_format_null_as_default} - -Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). -If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting. - -This setting is applicable to [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) queries for text input formats. - -Possible values: - -- 0 — Inserting `NULL` into a not nullable column causes an exception. -- 1 — `NULL` fields are initialized with default column values. - -Default value: `1`. - -## input_format_allow_seeks {#input_format_allow_seeks} - -Allow seeks while reading in ORC/Parquet/Arrow input formats. - -Enabled by default. - -## input_format_max_rows_to_read_for_schema_inference {#input_format_max_rows_to_read_for_schema_inference} - -The maximum rows of data to read for automatic schema inference. - -Default value: `25'000`. - -## column_names_for_schema_inference {#column_names_for_schema_inference} - -The list of column names to use in schema inference for formats without column names. The format: 'column1,column2,column3,...' - -## schema_inference_hints {#schema_inference_hints} - -The list of column names and types to use as hints in schema inference for formats without schema. - -Example: - -Query: -```sql -desc format(JSONEachRow, '{"x" : 1, "y" : "String", "z" : "0.0.0.0" }') settings schema_inference_hints='x UInt8, z IPv4'; -``` - -Result: -```sql -x UInt8 -y Nullable(String) -z IPv4 -``` - -## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} - -Controls making inferred types `Nullable` in schema inference for formats without information about nullability. -If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. - -Default value: `true`. - -## input_format_try_infer_integers {#input_format_try_infer_integers} - -If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. - -Enabled by default. - -## input_format_try_infer_dates {#input_format_try_infer_dates} - -If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. - -Enabled by default. - -## input_format_try_infer_datetimes {#input_format_try_infer_datetimes} - -If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. - -Enabled by default. - -## date_time_input_format {#date_time_input_format} - -Allows choosing a parser of the text representation of date and time. - -The setting does not apply to [date and time functions](../../sql-reference/functions/date-time-functions.md). - -Possible values: - -- `'best_effort'` — Enables extended parsing. - - ClickHouse can parse the basic `YYYY-MM-DD HH:MM:SS` format and all [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) date and time formats. For example, `'2018-06-08T01:02:03.000Z'`. - -- `'basic'` — Use basic parser. - - ClickHouse can parse only the basic `YYYY-MM-DD HH:MM:SS` or `YYYY-MM-DD` format. For example, `2019-08-20 10:18:56` or `2019-08-20`. - -Default value: `'basic'`. - -See also: - -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) - -## date_time_output_format {#date_time_output_format} - -Allows choosing different output formats of the text representation of date and time. - -Possible values: - -- `simple` - Simple output format. - - ClickHouse output date and time `YYYY-MM-DD hh:mm:ss` format. For example, `2019-08-20 10:18:56`. The calculation is performed according to the data type's time zone (if present) or server time zone. - -- `iso` - ISO output format. - - ClickHouse output date and time in [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) `YYYY-MM-DDThh:mm:ssZ` format. For example, `2019-08-20T10:18:56Z`. Note that output is in UTC (`Z` means UTC). - -- `unix_timestamp` - Unix timestamp output format. - - ClickHouse output date and time in [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time) format. For example `1566285536`. - -Default value: `simple`. - -See also: - -- [DateTime data type.](../../sql-reference/data-types/datetime.md) -- [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) - -## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} - -Deserialization of IPv4 will use default values instead of throwing exception on conversion error. - -Disabled by default. - -## input_format_ipv6_default_on_conversion_error {#input_format_ipv6_default_on_conversion_error} - -Deserialization of IPV6 will use default values instead of throwing exception on conversion error. - -Disabled by default. - -## bool_true_representation {#bool_true_representation} - -Text to represent true bool value in TSV/CSV/Vertical/Pretty formats. - -Default value: `true` - -## bool_false_representation {#bool_false_representation} - -Text to represent false bool value in TSV/CSV/Vertical/Pretty formats. - -Default value: `false` - -## output_format_decimal_trailing_zeros {#output_format_decimal_trailing_zeros} - -Output trailing zeros when printing Decimal values. E.g. 1.230000 instead of 1.23. - -Disabled by default. - -## input_format_allow_errors_num {#input_format_allow_errors_num} - -Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.). - -The default value is 0. - -Always pair it with `input_format_allow_errors_ratio`. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_num`, ClickHouse ignores the row and moves on to the next one. - -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. - -## input_format_allow_errors_ratio {#input_format_allow_errors_ratio} - -Sets the maximum percentage of errors allowed when reading from text formats (CSV, TSV, etc.). -The percentage of errors is set as a floating-point number between 0 and 1. - -The default value is 0. - -Always pair it with `input_format_allow_errors_num`. - -If an error occurred while reading rows but the error counter is still less than `input_format_allow_errors_ratio`, ClickHouse ignores the row and moves on to the next one. - -If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception. - -## format_schema {#format-schema} - -This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. - -## output_format_enable_streaming {#output_format_enable_streaming} - -Enable streaming in output formats that support it. - -Disabled by default. - -## output_format_write_statistics {#output_format_write_statistics} - -Write statistics about read rows, bytes, time elapsed in suitable output formats. - -Enabled by default - -## insert_distributed_one_random_shard {#insert_distributed_one_random_shard} - -Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key. - -By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards. - -Possible values: - -- 0 — Insertion is rejected if there are multiple shards and no distributed key is given. -- 1 — Insertion is done randomly among all available shards when no distributed key is given. - -Default value: `0`. - -## JSON formats settings {#json-formats-settings} - -### input_format_import_nested_json {#input_format_import_nested_json} - -Enables or disables the insertion of JSON data with nested objects. - -Supported formats: - -- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 0. - -See also: - -- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format. - -### input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers} - -Allow parsing bools as numbers in JSON input formats. - -Enabled by default. - -### input_format_json_read_numbers_as_strings {#input_format_json_read_numbers_as_strings} - -Allow parsing numbers as strings in JSON input formats. - -Disabled by default. - -### input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings} - -Allow parsing JSON objects as strings in JSON input formats. - -Example: - -```sql -SET input_format_json_read_objects_as_strings = 1; -CREATE TABLE test (id UInt64, obj String, date Date) ENGINE=Memory(); -INSERT INTO test FORMAT JSONEachRow {"id" : 1, "obj" : {"a" : 1, "b" : "Hello"}, "date" : "2020-01-01"}; -SELECT * FROM test; -``` - -Result: - -``` -┌─id─┬─obj──────────────────────┬───────date─┐ -│ 1 │ {"a" : 1, "b" : "Hello"} │ 2020-01-01 │ -└────┴──────────────────────────┴────────────┘ -``` - -Disabled by default. - -### input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata} - -For JSON/JSONCompact/JSONColumnsWithMetadata input formats, if this setting is set to 1, -the types from metadata in input data will be compared with the types of the corresponding columns from the table. - -Enabled by default. - -### output_format_json_quote_64bit_integers {#output_format_json_quote_64bit_integers} - -Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format. -Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations. - -Possible values: - -- 0 — Integers are output without quotes. -- 1 — Integers are enclosed in quotes. - -Default value: 1. - -### output_format_json_quote_64bit_floats {#output_format_json_quote_64bit_floats} - -Controls quoting of 64-bit [floats](../../sql-reference/data-types/float.md) when they are output in JSON* formats. - -Disabled by default. - -### output_format_json_quote_denormals {#output_format_json_quote_denormals} - -Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 0. - -**Example** - -Consider the following table `account_orders`: - -```text -┌─id─┬─name───┬─duration─┬─period─┬─area─┐ -│ 1 │ Andrew │ 20 │ 0 │ 400 │ -│ 2 │ John │ 40 │ 0 │ 0 │ -│ 3 │ Bob │ 15 │ 0 │ -100 │ -└────┴────────┴──────────┴────────┴──────┘ -``` - -When `output_format_json_quote_denormals = 0`, the query returns `null` values in output: - -```sql -SELECT area/period FROM account_orders FORMAT JSON; -``` - -```json -{ - "meta": - [ - { - "name": "divide(area, period)", - "type": "Float64" - } - ], - - "data": - [ - { - "divide(area, period)": null - }, - { - "divide(area, period)": null - }, - { - "divide(area, period)": null - } - ], - - "rows": 3, - - "statistics": - { - "elapsed": 0.003648093, - "rows_read": 3, - "bytes_read": 24 - } -} -``` - -When `output_format_json_quote_denormals = 1`, the query returns: - -```json -{ - "meta": - [ - { - "name": "divide(area, period)", - "type": "Float64" - } - ], - - "data": - [ - { - "divide(area, period)": "inf" - }, - { - "divide(area, period)": "-nan" - }, - { - "divide(area, period)": "-inf" - } - ], - - "rows": 3, - - "statistics": - { - "elapsed": 0.000070241, - "rows_read": 3, - "bytes_read": 24 - } -} -``` - -### output_format_json_quote_decimals {#output_format_json_quote_decimals} - -Controls quoting of decimals in JSON output formats. - -Disabled by default. - -### output_format_json_escape_forward_slashes {#output_format_json_escape_forward_slashes} - -Controls escaping forward slashes for string outputs in JSON output format. This is intended for compatibility with JavaScript. Don't confuse with backslashes that are always escaped. - -Enabled by default. - -### output_format_json_named_tuples_as_objects {#output_format_json_named_tuples_as_objects} - -Serialize named tuple columns as JSON objects. - -Enabled by default. - -### input_format_json_named_tuples_as_objects {#input_format_json_named_tuples_as_objects} - -Parse named tuple columns as JSON objects. - -Enabled by default. - -### input_format_json_defaults_for_missing_elements_in_named_tuple {#input_format_json_defaults_for_missing_elements_in_named_tuple} - -Insert default values for missing elements in JSON object while parsing named tuple. -This setting works only when setting `input_format_json_named_tuples_as_objects` is enabled. - -Enabled by default. - -### output_format_json_array_of_rows {#output_format_json_array_of_rows} - -Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format. - -Possible values: - -- 1 — ClickHouse outputs all rows as an array, each row in the `JSONEachRow` format. -- 0 — ClickHouse outputs each row separately in the `JSONEachRow` format. - -Default value: `0`. - -**Example of a query with the enabled setting** - -Query: - -```sql -SET output_format_json_array_of_rows = 1; -SELECT number FROM numbers(3) FORMAT JSONEachRow; -``` - -Result: - -```text -[ -{"number":"0"}, -{"number":"1"}, -{"number":"2"} -] -``` - -**Example of a query with the disabled setting** - -Query: - -```sql -SET output_format_json_array_of_rows = 0; -SELECT number FROM numbers(3) FORMAT JSONEachRow; -``` - -Result: - -```text -{"number":"0"} -{"number":"1"} -{"number":"2"} -``` - -### output_format_json_validate_utf8 {#output_format_json_validate_utf8} - -Controls validation of UTF-8 sequences in JSON output formats, doesn't impact formats JSON/JSONCompact/JSONColumnsWithMetadata, they always validate UTF-8. - -Disabled by default. - -### format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name} - -The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format. -Column type should be String. If value is empty, default names `row_{i}`will be used for object names. - -Default value: ''. - -## TSV format settings {#tsv-format-settings} - -### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} - -When enabled, replace empty input fields in TSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. - -Disabled by default. - -### input_format_tsv_enum_as_number {#input_format_tsv_enum_as_number} - -When enabled, always treat enum values as enum ids for TSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. - -Possible values: - -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. - -Default value: 0. - -**Example** - -Consider the table: - -```sql -CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); -``` - -When the `input_format_tsv_enum_as_number` setting is enabled: - -Query: - -```sql -SET input_format_tsv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -SELECT * FROM table_with_enum_column_for_tsv_insert; -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -``` - -Query: - -```sql -SET input_format_tsv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; -``` - -throws an exception. - -When the `input_format_tsv_enum_as_number` setting is disabled: - -Query: - -```sql -SET input_format_tsv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 102 2; -INSERT INTO table_with_enum_column_for_tsv_insert FORMAT TSV 103 'first'; -SELECT * FROM table_with_enum_column_for_tsv_insert; -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -┌──Id─┬─Value──┐ -│ 103 │ first │ -└─────┴────────┘ -``` - -### input_format_tsv_use_best_effort_in_schema_inference {#input_format_tsv_use_best_effort_in_schema_inference} - -Use some tweaks and heuristics to infer schema in TSV format. If disabled, all fields will be treated as String. - -Enabled by default. - -### input_format_tsv_skip_first_lines {#input_format_tsv_skip_first_lines} - -The number of lines to skip at the beginning of data in TSV input format. - -Default value: `0`. - -### output_format_tsv_crlf_end_of_line {#output_format_tsv_crlf_end_of_line} - -Use DOC/Windows-style line separator (CRLF) in TSV instead of Unix style (LF). - -Disabled by default. - -### format_tsv_null_representation {#format_tsv_null_representation} - -Defines the representation of `NULL` for [TSV](../../interfaces/formats.md/#tabseparated) output and input formats. User can set any string as a value, for example, `My NULL`. - -Default value: `\N`. - -**Examples** - -Query - -```sql -SELECT * FROM tsv_custom_null FORMAT TSV; -``` - -Result - -```text -788 -\N -\N -``` - -Query - -```sql -SET format_tsv_null_representation = 'My NULL'; -SELECT * FROM tsv_custom_null FORMAT TSV; -``` - -Result - -```text -788 -My NULL -My NULL -``` - -## CSV format settings {#csv-format-settings} - -### format_csv_delimiter {#format_csv_delimiter} - -The character is interpreted as a delimiter in the CSV data. - -Default value: `,`. - -### format_csv_allow_single_quotes {#format_csv_allow_single_quotes} - -If it is set to true, allow strings in single quotes. - -Enabled by default. - -### format_csv_allow_double_quotes {#format_csv_allow_double_quotes} - -If it is set to true, allow strings in double quotes. - -Enabled by default. - -### output_format_csv_crlf_end_of_line {#output_format_csv_crlf_end_of_line} - -Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF). - -Disabled by default. - -### input_format_csv_enum_as_number {#input_format_csv_enum_as_number} - -When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing. - -Possible values: - -- 0 — Enum values are parsed as values or as enum IDs. -- 1 — Enum values are parsed only as enum IDs. - -Default value: 0. - -**Examples** - -Consider the table: - -```sql -CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory(); -``` - -When the `input_format_csv_enum_as_number` setting is enabled: - -Query: - -```sql -SET input_format_csv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -``` - -Query: - -```sql -SET input_format_csv_enum_as_number = 1; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' -``` - -throws an exception. - -When the `input_format_csv_enum_as_number` setting is disabled: - -Query: - -```sql -SET input_format_csv_enum_as_number = 0; -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 102,2 -INSERT INTO table_with_enum_column_for_csv_insert FORMAT CSV 103,'first' -SELECT * FROM table_with_enum_column_for_csv_insert; -``` - -Result: - -```text -┌──Id─┬─Value──┐ -│ 102 │ second │ -└─────┴────────┘ -┌──Id─┬─Value─┐ -│ 103 │ first │ -└─────┴───────┘ -``` - -### input_format_csv_arrays_as_nested_csv {#input_format_csv_arrays_as_nested_csv} - -When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted. - -Disabled by default. - -### input_format_csv_empty_as_default {#input_format_csv_empty_as_default} - -When enabled, replace empty input fields in CSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too. - -Enabled by default. - -### input_format_csv_use_best_effort_in_schema_inference {#input_format_csv_use_best_effort_in_schema_inference} - -Use some tweaks and heuristics to infer schema in CSV format. If disabled, all fields will be treated as String. - -Enabled by default. - -### input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} - -The number of lines to skip at the beginning of data in CSV input format. - -Default value: `0`. - -### format_csv_null_representation {#format_csv_null_representation} - -Defines the representation of `NULL` for [CSV](../../interfaces/formats.md/#csv) output and input formats. User can set any string as a value, for example, `My NULL`. - -Default value: `\N`. - -**Examples** - -Query - -```sql -SELECT * from csv_custom_null FORMAT CSV; -``` - -Result - -```text -788 -\N -\N -``` - -Query - -```sql -SET format_csv_null_representation = 'My NULL'; -SELECT * FROM csv_custom_null FORMAT CSV; -``` - -Result - -```text -788 -My NULL -My NULL -``` - -## Values format settings {#values-format-settings} - -### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} - -Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md/#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section. - -Possible values: - -- 0 — Disabled. - - In this case, you must provide formatted data. See the [Formats](../../interfaces/formats.md) section. - -- 1 — Enabled. - - In this case, you can use an SQL expression as a value, but data insertion is much slower this way. If you insert only formatted data, then ClickHouse behaves as if the setting value is 0. - -Default value: 1. - -Example of Use - -Insert the [DateTime](../../sql-reference/data-types/datetime.md) type value with the different settings. - -``` sql -SET input_format_values_interpret_expressions = 0; -INSERT INTO datetime_t VALUES (now()) -``` - -``` text -Exception on client: -Code: 27. DB::Exception: Cannot parse input: expected ) before: now()): (at row 1) -``` - -``` sql -SET input_format_values_interpret_expressions = 1; -INSERT INTO datetime_t VALUES (now()) -``` - -``` text -Ok. -``` - -The last query is equivalent to the following: - -``` sql -SET input_format_values_interpret_expressions = 0; -INSERT INTO datetime_t SELECT now() -``` - -``` text -Ok. -``` - -### input_format_values_deduce_templates_of_expressions {#input_format_values_deduce_templates_of_expressions} - -Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md/#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -For the following query: - -``` sql -INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), (upper('Values')), ... -``` - -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=0`, expressions are interpreted separately for each row (this is very slow for large number of rows). -- If `input_format_values_interpret_expressions=0` and `format_values_deduce_templates_of_expressions=1`, expressions in the first, second and third rows are parsed using template `lower(String)` and interpreted together, expression in the forth row is parsed with another template (`upper(String)`). -- If `input_format_values_interpret_expressions=1` and `format_values_deduce_templates_of_expressions=1`, the same as in previous case, but also allows fallback to interpreting expressions separately if it’s not possible to deduce template. - -### input_format_values_accurate_types_of_literals {#input_format_values_accurate_types_of_literals} - -This setting is used only when `input_format_values_deduce_templates_of_expressions = 1`. Expressions for some column may have the same structure, but contain numeric literals of different types, e.g. - -``` sql -(..., abs(0), ...), -- UInt64 literal -(..., abs(3.141592654), ...), -- Float64 literal -(..., abs(-1), ...), -- Int64 literal -``` - -Possible values: - -- 0 — Disabled. - - In this case, ClickHouse may use a more general type for some literals (e.g., `Float64` or `Int64` instead of `UInt64` for `42`), but it may cause overflow and precision issues. - -- 1 — Enabled. - - In this case, ClickHouse checks the actual type of literal and uses an expression template of the corresponding type. In some cases, it may significantly slow down expression evaluation in `Values`. - -Default value: 1. - -## Arrow format settings {#arrow-format-settings} - -### input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - -### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} - -Ignore case when matching Arrow column names with ClickHouse column names. - -Disabled by default. - -### input_format_arrow_allow_missing_columns {#input_format_arrow_allow_missing_columns} - -While importing data, when column is not found in schema default value will be used instead of error. - -Disabled by default. - -### input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference {#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format Arrow. - -Disabled by default. - -### output_format_arrow_low_cardinality_as_dictionary {#output_format_arrow_low_cardinality_as_dictionary} - -Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format for `SELECT` queries. - -Possible values: - -- 0 — The `LowCardinality` type is not converted to the `DICTIONARY` type. -- 1 — The `LowCardinality` type is converted to the `DICTIONARY` type. - -Default value: `0`. - -### output_format_arrow_string_as_string {#output_format_arrow_string_as_string} - -Use Arrow String type instead of Binary for String columns. - -Disabled by default. - -## ORC format settings {#orc-format-settings} - -### input_format_orc_import_nested {#input_format_orc_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - -### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} - -Batch size when reading ORC stripes. - -Default value: `100'000` - -### input_format_orc_case_insensitive_column_matching {#input_format_orc_case_insensitive_column_matching} - -Ignore case when matching ORC column names with ClickHouse column names. - -Disabled by default. - -### input_format_orc_allow_missing_columns {#input_format_orc_allow_missing_columns} - -While importing data, when column is not found in schema default value will be used instead of error. - -Disabled by default. - -### input_format_orc_skip_columns_with_unsupported_types_in_schema_inference {#input_format_orc_skip_columns_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format Arrow. - -Disabled by default. - -### output_format_orc_string_as_string {#output_format_orc_string_as_string} - -Use ORC String type instead of Binary for String columns. - -Disabled by default. - -## Parquet format settings {#parquet-format-settings} - -## input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - -### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} - -Ignore case when matching Parquet column names with ClickHouse column names. - -Disabled by default. - -### output_format_parquet_row_group_size {#output_format_parquet_row_group_size} - -Row group size in rows. - -Default value: `1'000'000`. - -### input_format_parquet_allow_missing_columns {#input_format_parquet_allow_missing_columns} - -While importing data, when column is not found in schema default value will be used instead of error. - -Disabled by default. - -### input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference {#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format Parquet. - -Disabled by default. - -### output_format_parquet_string_as_string {#output_format_parquet_string_as_string} - -Use Parquet String type instead of Binary for String columns. - -Disabled by default. - -## Hive format settings {#hive-format-settings} - -### input_format_hive_text_fields_delimiter {#input_format_hive_text_fields_delimiter} - -Delimiter between fields in Hive Text File. - -Default value: `\x01`. - -### input_format_hive_text_collection_items_delimiter {#input_format_hive_text_collection_items_delimiter} - -Delimiter between collection(array or map) items in Hive Text File. - -Default value: `\x02`. - -### input_format_hive_text_map_keys_delimiter {#input_format_hive_text_map_keys_delimiter} - -Delimiter between a pair of map key/values in Hive Text File. - -Default value: `\x03`. - -## MsgPack format settings {#msgpack-format-settings} - -### input_format_msgpack_number_of_columns {#input_format_msgpack_number_of_columns} - -The number of columns in inserted MsgPack data. Used for automatic schema inference from data. - -Default value: `0`. - -### output_format_msgpack_uuid_representation {#output_format_msgpack_uuid_representation} - -The way how to output UUID in MsgPack format. -Possible values: - -- `bin` - as 16-bytes binary. -- `str` - as a string of 36 bytes. -- `ext` - as extention with ExtType = 2. - -Default value: `ext`. - - -## Protobuf format settings {#protobuf-format-settings} - -### input_format_protobuf_flatten_google_wrappers {#input_format_protobuf_flatten_google_wrappers} - -Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls. - -Disabled by default. - -### output_format_protobuf_nullables_with_google_wrappers {#output_format_protobuf_nullables_with_google_wrappers} - -When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized. - -Disabled by default. - -## Avro format settings {#avro-format-settings} - -### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} - -Enables using fields that are not specified in [Avro](../../interfaces/formats.md/#data-format-avro) or [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format schema. When a field is not found in the schema, ClickHouse uses the default value instead of throwing an exception. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 0. - -### format_avro_schema_registry_url {#format_avro_schema_registry_url} - -Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. - -Default value: `Empty`. - -### output_format_avro_codec {#output_format_avro_codec} - -Sets the compression codec used for output Avro file. - -Type: string - -Possible values: - -- `null` — No compression -- `deflate` — Compress with Deflate (zlib) -- `snappy` — Compress with [Snappy](https://google.github.io/snappy/) - -Default value: `snappy` (if available) or `deflate`. - -### output_format_avro_sync_interval {#output_format_avro_sync_interval} - -Sets minimum data size (in bytes) between synchronization markers for output Avro file. - -Type: unsigned int - -Possible values: 32 (32 bytes) - 1073741824 (1 GiB) - -Default value: 32768 (32 KiB) - -### output_format_avro_string_column_pattern {#output_format_avro_string_column_pattern} - -Regexp of column names of type String to output as Avro `string` (default is `bytes`). -RE2 syntax is supported. - -Type: string - -### output_format_avro_rows_in_file {#output_format_avro_rows_in_file} - -Max rows in a file (if permitted by storage). - -Default value: `1`. - -## Pretty formats settings {#pretty-formats-settings} - -### output_format_pretty_max_rows {#output_format_pretty_max_rows} - -Rows limit for Pretty formats. - -Default value: `10'000`. - -### output_format_pretty_max_column_pad_width {#output_format_pretty_max_column_pad_width} - -Maximum width to pad all values in a column in Pretty formats. - -Default value: `250`. - -### output_format_pretty_max_value_width {#output_format_pretty_max_value_width} - -Limits the width of value displayed in [Pretty](../../interfaces/formats.md/#pretty) formats. If the value width exceeds the limit, the value is cut. - -Possible values: - -- Positive integer. -- 0 — The value is cut completely. - -Default value: `10000` symbols. - -**Examples** - -Query: -```sql -SET output_format_pretty_max_value_width = 10; -SELECT range(number) FROM system.numbers LIMIT 10 FORMAT PrettyCompactNoEscapes; -``` -Result: -```text -┌─range(number)─┐ -│ [] │ -│ [0] │ -│ [0,1] │ -│ [0,1,2] │ -│ [0,1,2,3] │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -│ [0,1,2,3,4⋯ │ -└───────────────┘ -``` - -Query with zero width: -```sql -SET output_format_pretty_max_value_width = 0; -SELECT range(number) FROM system.numbers LIMIT 5 FORMAT PrettyCompactNoEscapes; -``` -Result: -```text -┌─range(number)─┐ -│ ⋯ │ -│ ⋯ │ -│ ⋯ │ -│ ⋯ │ -│ ⋯ │ -└───────────────┘ -``` - -### output_format_pretty_color {#output_format_pretty_color} - -Use ANSI escape sequences to paint colors in Pretty formats. - -Enabled by default. - -### output_format_pretty_grid_charset {#output_format_pretty_grid_charset} - -Allows changing a charset which is used for printing grids borders. Available charsets are UTF-8, ASCII. - -**Example** - -``` text -SET output_format_pretty_grid_charset = 'UTF-8'; -SELECT * FROM a; -┌─a─┐ -│ 1 │ -└───┘ - -SET output_format_pretty_grid_charset = 'ASCII'; -SELECT * FROM a; -+-a-+ -| 1 | -+---+ -``` - -### output_format_pretty_row_numbers {#output_format_pretty_row_numbers} - -Adds row numbers to output in the [Pretty](../../interfaces/formats.md/#pretty) format. - -Possible values: - -- 0 — Output without row numbers. -- 1 — Output with row numbers. - -Default value: `0`. - -**Example** - -Query: - -```sql -SET output_format_pretty_row_numbers = 1; -SELECT TOP 3 name, value FROM system.settings; -``` - -Result: -```text - ┌─name────────────────────┬─value───┐ -1. │ min_compress_block_size │ 65536 │ -2. │ max_compress_block_size │ 1048576 │ -3. │ max_block_size │ 65505 │ - └─────────────────────────┴─────────┘ -``` - -## Template format settings {#template-format-settings} - -### format_template_resultset {#format_template_resultset} - -Path to file which contains format string for result set (for Template format). - -### format_template_row {#format_template_row} - -Path to file which contains format string for rows (for Template format). - -### format_template_rows_between_delimiter {#format_template_rows_between_delimiter} - -Delimiter between rows (for Template format). - -## CustomSeparated format settings {custom-separated-format-settings} - -### format_custom_escaping_rule {#format_custom_escaping_rule} - -Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Possible values: - -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). - -Default value: `'Escaped'`. - -### format_custom_field_delimiter {#format_custom_field_delimiter} - -Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `'\t'`. - -### format_custom_row_before_delimiter {#format_custom_row_before_delimiter} - -Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -### format_custom_row_after_delimiter {#format_custom_row_after_delimiter} - -Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `'\n'`. - -### format_custom_row_between_delimiter {#format_custom_row_between_delimiter} - -Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -### format_custom_result_before_delimiter {#format_custom_result_before_delimiter} - -Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -### format_custom_result_after_delimiter {#format_custom_result_after_delimiter} - -Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format. - -Default value: `''`. - -## Regexp format settings {#regexp-format-settings} - -### format_regexp_escaping_rule {#format_regexp_escaping_rule} - -Field escaping rule. - -Possible values: - -- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated). -- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values). -- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv). -- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow). -- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml). -- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw). - -Default value: `Raw`. - -### format_regexp_skip_unmatched {#format_regexp_skip_unmatched} - -Skip lines unmatched by regular expression. - -Disabled by default. - -## CapnProto format settings {#capn-proto-format-settings} - -### format_capn_proto_enum_comparising_mode {#format_capn_proto_enum_comparising_mode} - -Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md/#capnproto) `Enum` data type from schema. - -Possible values: - -- `'by_values'` — Values in enums should be the same, names can be different. -- `'by_names'` — Names in enums should be the same, values can be different. -- `'by_name_case_insensitive'` — Names in enums should be the same case-insensitive, values can be different. - -Default value: `'by_values'`. - -## MySQLDump format settings {#musqldump-format-settings} - -### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) - -The name of the table from which to read data from in MySQLDump input format. - -### input_format_mysql_dump_map_columns (#input_format_mysql_dump_map_columns) - -Enables matching columns from table in MySQL dump and columns from ClickHouse table by names in MySQLDump input format. - -Possible values: - -- 0 — Disabled. -- 1 — Enabled. - -Default value: 1. - -## SQLInsert format settings {#sqlinsert-format-settings} - -### output_format_sql_insert_max_batch_size {#output_format_sql_insert_max_batch_size} - -The maximum number of rows in one INSERT statement. - -Default value: `65505`. - -### output_format_sql_insert_table_name {#output_format_sql_insert_table_name} - -The name of table that will be used in the output INSERT statement. - -Default value: `'table''`. - -### output_format_sql_insert_include_column_names {#output_format_sql_insert_include_column_names} - -Include column names in INSERT statement. - -Default value: `true`. - -### output_format_sql_insert_use_replace {#output_format_sql_insert_use_replace} - -Use REPLACE keyword instead of INSERT. - -Default value: `false`. - -### output_format_sql_insert_quote_names {#output_format_sql_insert_quote_names} - -Quote column names with "`" characters - -Default value: `true`. - -## BSONEachRow format settings {#bson-each-row-format-settings} - -### output_format_bson_string_as_string {#output_format_bson_string_as_string} - -Use BSON String type instead of Binary for String columns. - -Disabled by default. - -### input_format_bson_skip_fields_with_unsupported_types_in_schema_inference {#input_format_bson_skip_fields_with_unsupported_types_in_schema_inference} - -Allow skipping columns with unsupported types while schema inference for format BSONEachRow. - -Disabled by default. - -## RowBinary format settings {#row-binary-format-settings} - -### format_binary_max_string_size {#format_binary_max_string_size} - -The maximum allowed size for String in RowBinary format. It prevents allocating large amount of memory in case of corrupted data. 0 means there is no limit. - -Default value: `1GiB` diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index 19ff6e33142..551aa771ec9 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -9,6 +9,7 @@ Columns: - `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. +- `description` ([String](../../sql-reference/data-types/string.md) - Metric description) **Example** @@ -17,18 +18,18 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 ``` ``` text -┌─metric──────────────────────────────────┬──────value─┐ -│ jemalloc.background_thread.run_interval │ 0 │ -│ jemalloc.background_thread.num_runs │ 0 │ -│ jemalloc.background_thread.num_threads │ 0 │ -│ jemalloc.retained │ 422551552 │ -│ jemalloc.mapped │ 1682989056 │ -│ jemalloc.resident │ 1656446976 │ -│ jemalloc.metadata_thp │ 0 │ -│ jemalloc.metadata │ 10226856 │ -│ UncompressedCacheCells │ 0 │ -│ MarkCacheFiles │ 0 │ -└─────────────────────────────────────────┴────────────┘ +┌─metric──────────────────────────────────┬──────value─┬─description────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐ +│ AsynchronousMetricsCalculationTimeSpent │ 0.00179053 │ Time in seconds spent for calculation of asynchronous metrics (this is the overhead of asynchronous metrics). │ +│ NumberOfDetachedByUserParts │ 0 │ The total number of parts detached from MergeTree tables by users with the `ALTER TABLE DETACH` query (as opposed to unexpected, broken or ignored parts). The server does not care about detached parts and they can be removed. │ +│ NumberOfDetachedParts │ 0 │ The total number of parts detached from MergeTree tables. A part can be detached by a user with the `ALTER TABLE DETACH` query or by the server itself it the part is broken, unexpected or unneeded. The server does not care about detached parts and they can be removed. │ +│ TotalRowsOfMergeTreeTables │ 2781309 │ Total amount of rows (records) stored in all tables of MergeTree family. │ +│ TotalBytesOfMergeTreeTables │ 7741926 │ Total amount of bytes (compressed, including data and indices) stored in all tables of MergeTree family. │ +│ NumberOfTables │ 93 │ Total number of tables summed across the databases on the server, excluding the databases that cannot contain MergeTree tables. The excluded database engines are those who generate the set of tables on the fly, like `Lazy`, `MySQL`, `PostgreSQL`, `SQlite`. │ +│ NumberOfDatabases │ 6 │ Total number of databases on the server. │ +│ MaxPartCountForPartition │ 6 │ Maximum number of parts per partition across all partitions of all tables of MergeTree family. Values larger than 300 indicates misconfiguration, overload, or massive data loading. │ +│ ReplicasSumMergesInQueue │ 0 │ Sum of merge operations in the queue (still to be applied) across Replicated tables. │ +│ ReplicasSumInsertsInQueue │ 0 │ Sum of INSERT operations in the queue (still to be replicated) across Replicated tables. │ +└─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` **See Also** diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 14a6b5ea786..284ba866cc8 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -72,3 +72,10 @@ If procfs is supported and enabled on the system, ClickHouse server collects the - `OSWriteChars` - `OSReadBytes` - `OSWriteBytes` + +## Related content + +- Blog: [System Tables and a window into the internals of ClickHouse](https://clickhouse.com/blog/clickhouse-debugging-issues-with-system-tables) +- Blog: [Essential monitoring queries - part 1 - INSERT queries](https://clickhouse.com/blog/monitoring-troubleshooting-insert-queries-clickhouse) +- Blog: [Essential monitoring queries - part 2 - SELECT queries](https://clickhouse.com/blog/monitoring-troubleshooting-select-queries-clickhouse) + diff --git a/docs/en/sql-reference/data-types/lowcardinality.md b/docs/en/sql-reference/data-types/lowcardinality.md index 6410de8199a..58a99baa09e 100644 --- a/docs/en/sql-reference/data-types/lowcardinality.md +++ b/docs/en/sql-reference/data-types/lowcardinality.md @@ -54,7 +54,9 @@ Functions: - [toLowCardinality](../../sql-reference/functions/type-conversion-functions.md#tolowcardinality) -## See Also +## Related content -- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/). -- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf). +- [Reducing ClickHouse Storage Cost with the Low Cardinality Type – Lessons from an Instana Engineer](https://www.instana.com/blog/reducing-clickhouse-storage-cost-with-the-low-cardinality-type-lessons-from-an-instana-engineer/) +- [String Optimization (video presentation in Russian)](https://youtu.be/rqf-ILRgBdY?list=PL0Z2YDlm0b3iwXCpEFiOOYmwXzVmjJfEt). [Slides in English](https://github.com/ClickHouse/clickhouse-presentations/raw/master/meetup19/string_optimization.pdf) +- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 225f2b162ab..c8ac19afe01 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -529,6 +529,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. +For an alternative to `age`, see function `date\_diff`. **Syntax** @@ -600,8 +601,12 @@ Result: ## date\_diff -Returns the count of the specified `unit` boundaries crossed between the `startdate` and `enddate`. -The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). +Returns the count of the specified `unit` boundaries crossed between the `startdate` and the `enddate`. +The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for unit `day` (see [toRelativeDayNum](#torelativedaynum)), 1 month for unit `month` (see [toRelativeMonthNum](#torelativemonthnum)) and 1 year for unit `year` (see [toRelativeYearNum](#torelativeyearnum)). + +If unit `week` was specified, `date\_diff` assumes that weeks start on Monday. Note that this behavior is different from that of function `toWeek()` in which weeks start by default on Sunday. + +For an alternative to `date\_diff`, see function `age`. **Syntax** @@ -1582,3 +1587,8 @@ Result: │ 2020-01-01 │ └────────────────────────────────────┘ ``` + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) + diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index c30893032b3..eb6866d28ea 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -588,3 +588,6 @@ Result: │ aeca2A │ └───────────────────────────────────────┘ ``` + +## Related content +- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) diff --git a/docs/en/sql-reference/functions/time-window-functions.md b/docs/en/sql-reference/functions/time-window-functions.md index f03a206da07..c0eed01cccd 100644 --- a/docs/en/sql-reference/functions/time-window-functions.md +++ b/docs/en/sql-reference/functions/time-window-functions.md @@ -115,3 +115,7 @@ Returns the exclusive upper bound of the corresponding hopping window. hopEnd(bounds_tuple); hopEnd(time_attr, hop_interval, window_interval [, timezone]); ``` + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) \ No newline at end of file diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index bb72ab7cfc3..e587e56b20e 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -1402,6 +1402,8 @@ The output value is a timestamp in UTC, not in the timezone of `DateTime64`. ```sql toUnixTimestamp64Milli(value) +toUnixTimestamp64Micro(value) +toUnixTimestamp64Nano(value) ``` **Arguments** @@ -1455,7 +1457,9 @@ Converts an `Int64` to a `DateTime64` value with fixed sub-second precision and **Syntax** ``` sql -fromUnixTimestamp64Milli(value [, ti]) +fromUnixTimestamp64Milli(value [, timezone]) +fromUnixTimestamp64Micro(value [, timezone]) +fromUnixTimestamp64Nano(value [, timezone]) ``` **Arguments** diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index ae8671ffa9d..d580efa4992 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -158,8 +158,6 @@ For examples of columns TTL modifying, see [Column TTL](/docs/en/engines/table-e If the `IF EXISTS` clause is specified, the query won’t return an error if the column does not exist. -The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description. - When changing the type, values are converted as if the [toType](/docs/en/sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query does not do anything complex, and is completed almost instantly. Example: @@ -170,6 +168,40 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time. +The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description, but column type is mandatory in this case. + +Example: + +```sql +CREATE TABLE users ( + c1 Int16, + c2 String +) ENGINE = MergeTree +ORDER BY c1; + +DESCRIBE users; +┌─name─┬─type───┬ +│ c1 │ Int16 │ +│ c2 │ String │ +└──────┴────────┴ + +ALTER TABLE users MODIFY COLUMN c2 String FIRST; + +DESCRIBE users; +┌─name─┬─type───┬ +│ c2 │ String │ +│ c1 │ Int16 │ +└──────┴────────┴ + +ALTER TABLE users ALTER COLUMN c2 TYPE String AFTER c1; + +DESCRIBE users; +┌─name─┬─type───┬ +│ c1 │ Int16 │ +│ c2 │ String │ +└──────┴────────┴ +``` + The `ALTER` query is atomic. For MergeTree tables it is also lock-free. The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously. diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 68fb968c609..bb90a9f3d42 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -502,3 +502,9 @@ Result: │ t1 │ The temporary table │ └──────┴─────────────────────┘ ``` + + +## Related content + +- Blog: [Optimizing ClickHouse with Schemas and Codecs](https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema) +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 91f542be285..527b31b36a4 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -350,3 +350,7 @@ The window view is useful in the following scenarios: * **Monitoring**: Aggregate and calculate the metrics logs by time, and output the results to a target table. The dashboard can use the target table as a source table. * **Analyzing**: Automatically aggregate and preprocess data in the time window. This can be useful when analyzing a large number of logs. The preprocessing eliminates repeated calculations in multiple queries and reduces query latency. + +## Related Content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/explain.md b/docs/en/sql-reference/statements/explain.md index 59af48b79ab..5081abf2fb8 100644 --- a/docs/en/sql-reference/statements/explain.md +++ b/docs/en/sql-reference/statements/explain.md @@ -276,14 +276,12 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; With `indexes` = 1, the `Indexes` key is added. It contains an array of used indexes. Each index is described as JSON with `Type` key (a string `MinMax`, `Partition`, `PrimaryKey` or `Skip`) and optional keys: -- `Name` — An index name (for now, is used only for `Skip` index). -- `Keys` — An array of columns used by the index. -- `Condition` — A string with condition used. -- `Description` — An index (for now, is used only for `Skip` index). -- `Initial Parts` — A number of parts before the index is applied. -- `Selected Parts` — A number of parts after the index is applied. -- `Initial Granules` — A number of granules before the index is applied. -- `Selected Granulesis` — A number of granules after the index is applied. +- `Name` — The index name (currently only used for `Skip` indexes). +- `Keys` — The array of columns used by the index. +- `Condition` — The used condition. +- `Description` — The index description (currently only used for `Skip` indexes). +- `Parts` — The number of parts before/after the index is applied. +- `Granules` — The number of granules before/after the index is applied. Example: @@ -294,46 +292,36 @@ Example: "Type": "MinMax", "Keys": ["y"], "Condition": "(y in [1, +inf))", - "Initial Parts": 5, - "Selected Parts": 4, - "Initial Granules": 12, - "Selected Granules": 11 + "Parts": 5/4, + "Granules": 12/11 }, { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], "Condition": "and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +inf)), (bitAnd(z, 3) not in [1, 1])))", - "Initial Parts": 4, - "Selected Parts": 3, - "Initial Granules": 11, - "Selected Granules": 10 + "Parts": 4/3, + "Granules": 11/10 }, { "Type": "PrimaryKey", "Keys": ["x", "y"], "Condition": "and((x in [11, +inf)), (y in [1, +inf)))", - "Initial Parts": 3, - "Selected Parts": 2, - "Initial Granules": 10, - "Selected Granules": 6 + "Parts": 3/2, + "Granules": 10/6 }, { "Type": "Skip", "Name": "t_minmax", "Description": "minmax GRANULARITY 2", - "Initial Parts": 2, - "Selected Parts": 1, - "Initial Granules": 6, - "Selected Granules": 2 + "Parts": 2/1, + "Granules": 6/2 }, { "Type": "Skip", "Name": "t_set", "Description": "set GRANULARITY 2", - "Initial Parts": 1, - "Selected Parts": 1, - "Initial Granules": 2, - "Selected Granules": 1 + "": 1/1, + "Granules": 2/1 } ] ``` diff --git a/docs/en/sql-reference/statements/select/array-join.md b/docs/en/sql-reference/statements/select/array-join.md index 4bed43a3301..a1b5e0cdb36 100644 --- a/docs/en/sql-reference/statements/select/array-join.md +++ b/docs/en/sql-reference/statements/select/array-join.md @@ -299,3 +299,8 @@ ARRAY JOIN nest AS n, arrayEnumerate(`nest.x`) AS num; ## Implementation Details The query execution order is optimized when running `ARRAY JOIN`. Although `ARRAY JOIN` must always be specified before the [WHERE](../../../sql-reference/statements/select/where.md)/[PREWHERE](../../../sql-reference/statements/select/prewhere.md) clause in a query, technically they can be performed in any order, unless result of `ARRAY JOIN` is used for filtering. The processing order is controlled by the query optimizer. + + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/select/except.md b/docs/en/sql-reference/statements/select/except.md index 83bf0879213..f73cbff9819 100644 --- a/docs/en/sql-reference/statements/select/except.md +++ b/docs/en/sql-reference/statements/select/except.md @@ -23,7 +23,9 @@ FROM table2 ``` The condition could be any expression based on your requirements. -**Examples** +## Examples + +Here is a simple example that returns the numbers 1 to 10 that are _not_ a part of the numbers 3 to 8: Query: @@ -33,7 +35,7 @@ SELECT number FROM numbers(1,10) EXCEPT SELECT number FROM numbers(3,6); Result: -``` text +```response ┌─number─┐ │ 1 │ │ 2 │ @@ -42,28 +44,109 @@ Result: └────────┘ ``` -Query: +`EXCEPT` and `INTERSECT` can often be used interchangeably with different Boolean logic, and they are both useful if you have two tables that share a common column (or columns). For example, suppose we have a few million rows of historical cryptocurrency data that contains trade prices and volume: -``` sql -CREATE TABLE t1(one String, two String, three String) ENGINE=Memory(); -CREATE TABLE t2(four String, five String, six String) ENGINE=Memory(); +```sql +CREATE TABLE crypto_prices +( + trade_date Date, + crypto_name String, + volume Float32, + price Float32, + market_cap Float32, + change_1_day Float32 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name, trade_date); -INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o'); -INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd'); +INSERT INTO crypto_prices + SELECT * + FROM s3( + 'https://learn-clickhouse.s3.us-east-2.amazonaws.com/crypto_prices.csv', + 'CSVWithNames' +); -SELECT * FROM t1 EXCEPT SELECT * FROM t2; +SELECT * FROM crypto_prices +WHERE crypto_name = 'Bitcoin' +ORDER BY trade_date DESC +LIMIT 10; +``` + +```response +┌─trade_date─┬─crypto_name─┬──────volume─┬────price─┬───market_cap─┬──change_1_day─┐ +│ 2020-11-02 │ Bitcoin │ 30771456000 │ 13550.49 │ 251119860000 │ -0.013585099 │ +│ 2020-11-01 │ Bitcoin │ 24453857000 │ 13737.11 │ 254569760000 │ -0.0031840964 │ +│ 2020-10-31 │ Bitcoin │ 30306464000 │ 13780.99 │ 255372070000 │ 0.017308505 │ +│ 2020-10-30 │ Bitcoin │ 30581486000 │ 13546.52 │ 251018150000 │ 0.008084608 │ +│ 2020-10-29 │ Bitcoin │ 56499500000 │ 13437.88 │ 248995320000 │ 0.012552661 │ +│ 2020-10-28 │ Bitcoin │ 35867320000 │ 13271.29 │ 245899820000 │ -0.02804481 │ +│ 2020-10-27 │ Bitcoin │ 33749879000 │ 13654.22 │ 252985950000 │ 0.04427984 │ +│ 2020-10-26 │ Bitcoin │ 29461459000 │ 13075.25 │ 242251000000 │ 0.0033826586 │ +│ 2020-10-25 │ Bitcoin │ 24406921000 │ 13031.17 │ 241425220000 │ -0.0058658565 │ +│ 2020-10-24 │ Bitcoin │ 24542319000 │ 13108.06 │ 242839880000 │ 0.013650347 │ +└────────────┴─────────────┴─────────────┴──────────┴──────────────┴───────────────┘ +``` + +Now suppose we have a table named `holdings` that contains a list of cryptocurrencies that we own, along with the number of coins: + +```sql +CREATE TABLE holdings +( + crypto_name String, + quantity UInt64 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name); + +INSERT INTO holdings VALUES + ('Bitcoin', 1000), + ('Bitcoin', 200), + ('Ethereum', 250), + ('Ethereum', 5000), + ('DOGEFI', 10); + ('Bitcoin Diamond', 5000); +``` + +We can use `EXCEPT` to answer a question like **"Which coins do we own have never traded below $10?"**: + +```sql +SELECT crypto_name FROM holdings +EXCEPT +SELECT crypto_name FROM crypto_prices +WHERE price < 10; ``` Result: -``` text -┌─one─┬─two─┬─three─┐ -│ l │ p │ o │ -│ k │ t │ d │ -│ l │ p │ o │ -└─────┴─────┴───────┘ +```response +┌─crypto_name─┐ +│ Bitcoin │ +│ Bitcoin │ +└─────────────┘ ``` +This means of the four cryptocurrencies we own, only Bitcoin has never dropped below $10 (based on the limited data we have here in this example). + +## EXCEPT DISTINCT + +Notice in the previous query we had multiple Bitcoin holdings in the result. You can add `DISTINCT` to `EXCEPT` to eliminate duplicate rows from the result: + +```sql +SELECT crypto_name FROM holdings +EXCEPT DISTINCT +SELECT crypto_name FROM crypto_prices +WHERE price < 10; +``` + +Result: + +```response +┌─crypto_name─┐ +│ Bitcoin │ +└─────────────┘ +``` + + **See Also** - [UNION](union.md#union-clause) diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md index f1eb4738543..ea7a39421a5 100644 --- a/docs/en/sql-reference/statements/select/intersect.md +++ b/docs/en/sql-reference/statements/select/intersect.md @@ -24,17 +24,17 @@ FROM table2 ``` The condition could be any expression based on your requirements. -**Examples** +## Examples -Query: +Here is a simple example that intersects the numbers 1 to 10 with the numbers 3 to 8: -``` sql +```sql SELECT number FROM numbers(1,10) INTERSECT SELECT number FROM numbers(3,6); ``` Result: -``` text +```response ┌─number─┐ │ 3 │ │ 4 │ @@ -45,29 +45,112 @@ Result: └────────┘ ``` -Query: +`INTERSECT` is useful if you have two tables that share a common column (or columns). You can intersect the results of two queries, as long as the results contain the same columns. For example, suppose we have a few million rows of historical cryptocurrency data that contains trade prices and volume: -``` sql -CREATE TABLE t1(one String, two String, three String) ENGINE=Memory(); -CREATE TABLE t2(four String, five String, six String) ENGINE=Memory(); +```sql +CREATE TABLE crypto_prices +( + trade_date Date, + crypto_name String, + volume Float32, + price Float32, + market_cap Float32, + change_1_day Float32 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name, trade_date); -INSERT INTO t1 VALUES ('q', 'm', 'b'), ('s', 'd', 'f'), ('l', 'p', 'o'), ('s', 'd', 'f'), ('s', 'd', 'f'), ('k', 't', 'd'), ('l', 'p', 'o'); -INSERT INTO t2 VALUES ('q', 'm', 'b'), ('b', 'd', 'k'), ('s', 'y', 't'), ('s', 'd', 'f'), ('m', 'f', 'o'), ('k', 'k', 'd'); +INSERT INTO crypto_prices + SELECT * + FROM s3( + 'https://learn-clickhouse.s3.us-east-2.amazonaws.com/crypto_prices.csv', + 'CSVWithNames' +); -SELECT * FROM t1 INTERSECT SELECT * FROM t2; +SELECT * FROM crypto_prices +WHERE crypto_name = 'Bitcoin' +ORDER BY trade_date DESC +LIMIT 10; +``` + +```response +┌─trade_date─┬─crypto_name─┬──────volume─┬────price─┬───market_cap─┬──change_1_day─┐ +│ 2020-11-02 │ Bitcoin │ 30771456000 │ 13550.49 │ 251119860000 │ -0.013585099 │ +│ 2020-11-01 │ Bitcoin │ 24453857000 │ 13737.11 │ 254569760000 │ -0.0031840964 │ +│ 2020-10-31 │ Bitcoin │ 30306464000 │ 13780.99 │ 255372070000 │ 0.017308505 │ +│ 2020-10-30 │ Bitcoin │ 30581486000 │ 13546.52 │ 251018150000 │ 0.008084608 │ +│ 2020-10-29 │ Bitcoin │ 56499500000 │ 13437.88 │ 248995320000 │ 0.012552661 │ +│ 2020-10-28 │ Bitcoin │ 35867320000 │ 13271.29 │ 245899820000 │ -0.02804481 │ +│ 2020-10-27 │ Bitcoin │ 33749879000 │ 13654.22 │ 252985950000 │ 0.04427984 │ +│ 2020-10-26 │ Bitcoin │ 29461459000 │ 13075.25 │ 242251000000 │ 0.0033826586 │ +│ 2020-10-25 │ Bitcoin │ 24406921000 │ 13031.17 │ 241425220000 │ -0.0058658565 │ +│ 2020-10-24 │ Bitcoin │ 24542319000 │ 13108.06 │ 242839880000 │ 0.013650347 │ +└────────────┴─────────────┴─────────────┴──────────┴──────────────┴───────────────┘ +``` + +Now suppose we have a table named `holdings` that contains a list of cryptocurrencies that we own, along with the number of coins: + +```sql +CREATE TABLE holdings +( + crypto_name String, + quantity UInt64 +) +ENGINE = MergeTree +PRIMARY KEY (crypto_name); + +INSERT INTO holdings VALUES + ('Bitcoin', 1000), + ('Bitcoin', 200), + ('Ethereum', 250), + ('Ethereum', 5000), + ('DOGEFI', 10); + ('Bitcoin Diamond', 5000); +``` + +We can use `INTERSECT` to answer questions like **"Which coins do we own have traded at a price greater than $100?"**: + +```sql +SELECT crypto_name FROM holdings +INTERSECT +SELECT crypto_name FROM crypto_prices +WHERE price > 100 ``` Result: -``` text -┌─one─┬─two─┬─three─┐ -│ q │ m │ b │ -│ s │ d │ f │ -│ s │ d │ f │ -│ s │ d │ f │ -└─────┴─────┴───────┘ +```response +┌─crypto_name─┐ +│ Bitcoin │ +│ Bitcoin │ +│ Ethereum │ +│ Ethereum │ +└─────────────┘ ``` +This means at some point in time, Bitcoin and Ethereum traded above $100, and DOGEFI and Bitcoin Diamond have never traded above $100 (at least using the data we have here in this example). + +## INTERSECT DISTINCT + +Notice in the previous query we had multiple Bitcoin and Ethereum holdings that traded above $100. It might be nice to remove duplicate rows (since they only repeat what we already know). You can add `DISTINCT` to `INTERSECT` to eliminate duplicate rows from the result: + +```sql +SELECT crypto_name FROM holdings +INTERSECT DISTINCT +SELECT crypto_name FROM crypto_prices +WHERE price > 100; +``` + +Result: + +```response +┌─crypto_name─┐ +│ Bitcoin │ +│ Ethereum │ +└─────────────┘ +``` + + **See Also** - [UNION](union.md#union-clause) diff --git a/docs/en/sql-reference/statements/select/order-by.md b/docs/en/sql-reference/statements/select/order-by.md index 41370a38b16..e231a1cc72c 100644 --- a/docs/en/sql-reference/statements/select/order-by.md +++ b/docs/en/sql-reference/statements/select/order-by.md @@ -543,3 +543,7 @@ Result: │ 7 │ original │ 7 │ └─────┴──────────┴───────┘ ``` + +## Related content + +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index a82d1447453..6c62229f0d5 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -72,7 +72,7 @@ For more convenient (automatic) cache management, see disable_internal_dns_cache ## DROP MARK CACHE -Resets the mark cache. Used in development of ClickHouse and performance tests. +Resets the mark cache. ## DROP REPLICA @@ -94,13 +94,18 @@ The fourth one is useful to remove metadata of dead replica when all other repli ## DROP UNCOMPRESSED CACHE -Reset the uncompressed data cache. Used in development of ClickHouse and performance tests. -For manage uncompressed data cache parameters use following server level settings [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) and query/user/profile level settings [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache) +Reset the uncompressed data cache. +The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache). +Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size). ## DROP COMPILED EXPRESSION CACHE -Reset the compiled expression cache. Used in development of ClickHouse and performance tests. -Compiled expression cache used when query/user/profile enable option [compile-expressions](../../operations/settings/settings.md#compile-expressions) +Reset the compiled expression cache. +The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions). + +## DROP QUERY RESULT CACHE + +Resets the [query result cache](../../operations/query-result-cache.md). ## FLUSH LOGS diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 045c9777ad7..47a3ef16ba2 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -14,7 +14,7 @@ The `INSERT` query uses both parsers: INSERT INTO t VALUES (1, 'Hello, world'), (2, 'abc'), (3, 'def') ``` -The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). +The `INSERT INTO t VALUES` fragment is parsed by the full parser, and the data `(1, 'Hello, world'), (2, 'abc'), (3, 'def')` is parsed by the fast stream parser. You can also turn on the full parser for the data by using the [input_format_values_interpret_expressions](../operations/settings/settings-formats.md#settings-input_format_values_interpret_expressions) setting. When `input_format_values_interpret_expressions = 1`, ClickHouse first tries to parse values with the fast stream parser. If it fails, ClickHouse tries to use the full parser for the data, treating it like an SQL [expression](#syntax-expressions). Data can have any format. When a query is received, the server calculates no more than [max_query_size](../operations/settings/settings.md#settings-max_query_size) bytes of the request in RAM (by default, 1 MB), and the rest is stream parsed. It allows for avoiding issues with large `INSERT` queries. diff --git a/docs/en/sql-reference/table-functions/generate.md b/docs/en/sql-reference/table-functions/generate.md index 380c8364090..90d81e4f74e 100644 --- a/docs/en/sql-reference/table-functions/generate.md +++ b/docs/en/sql-reference/table-functions/generate.md @@ -51,4 +51,7 @@ SELECT * FROM random; │ [] │ 68091.8197 │ ('2037-10-02 12:44:23.368','039ecab7-81c2-45ee-208c-844e5c6c5652') │ │ [8,-83,0,-22,65,9,-30,28,64] │ -186233.4909 │ ('2062-01-11 00:06:04.124','69563ea1-5ad1-f870-16d8-67061da0df25') │ └──────────────────────────────┴──────────────┴────────────────────────────────────────────────────────────────────┘ -``` \ No newline at end of file +``` + +## Related content +- Blog: [Generating random data in ClickHouse](https://clickhouse.com/blog/generating-random-test-distribution-data-for-clickhouse) diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md new file mode 100644 index 00000000000..dd063ae1796 --- /dev/null +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -0,0 +1,74 @@ +--- +slug: /en/sql-reference/table-functions/mongodb +sidebar_position: 42 +sidebar_label: mongodb +--- + +# mongodb + +Allows `SELECT` queries to be performed on data that is stored on a remote MongoDB server. + +**Syntax** + +``` sql +mongodb(host:port, database, collection, user, password, structure [, options]) +``` + +**Arguments** + +- `host:port` — MongoDB server address. + +- `database` — Remote database name. + +- `collection` — Remote collection name. + +- `user` — MongoDB user. + +- `password` — User password. + +- `structure` - The schema for the ClickHouse table returned from this function. + +- `options` - MongoDB connection string options (optional parameter). + + +**Returned Value** + +A table object with the same columns as the original MongoDB table. + + +**Examples** + +Suppose we have a collection named `my_collection` defined in a MongoDB database named `test`, and we insert a couple of documents: + +```sql +db.createUser({user:"test_user",pwd:"password",roles:[{role:"readWrite",db:"test"}]}) + +db.createCollection("my_collection") + +db.my_collection.insertOne( + { log_type: "event", host: "120.5.33.9", command: "check-cpu-usage -w 75 -c 90" } +) + +db.my_collection.insertOne( + { log_type: "event", host: "120.5.33.4", command: "system-check"} +) +``` + +Let's query the collection using the `mongodb` table function: + +```sql +SELECT * FROM mongodb( + '127.0.0.1:27017', + 'test', + 'my_collection', + 'test_user', + 'password', + 'log_type String, host String, command String', + 'connectTimeoutMS=10000' +) +``` + +**See Also** + +- [The `MongoDB` table engine](../../engines/table-engines/integrations/mongodb.md) +- [Using MongoDB as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources/#mongodb) diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index 3c1352cd56c..87fc6ecb234 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -131,3 +131,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32) - [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md) - [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql) + +## Related content +- Blog: [ClickHouse and PostgreSQL - a match made in data heaven - part 1](https://clickhouse.com/blog/migrating-data-between-clickhouse-postgres) diff --git a/docs/en/sql-reference/window-functions/index.md b/docs/en/sql-reference/window-functions/index.md index a545fb630c9..f8107e3310e 100644 --- a/docs/en/sql-reference/window-functions/index.md +++ b/docs/en/sql-reference/window-functions/index.md @@ -590,5 +590,6 @@ ORDER BY ## Related Content -- [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) -- [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) +- Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) +- Blog: [Window and array functions for Git commit sequences](https://clickhouse.com/blog/clickhouse-window-array-functions-git-commits) +- Blog: [Getting Data Into ClickHouse - Part 3 - Using S3](https://clickhouse.com/blog/getting-data-into-clickhouse-part-3-s3) diff --git a/docs/ru/sql-reference/statements/explain.md b/docs/ru/sql-reference/statements/explain.md index 4d5fa70e098..0179c840df6 100644 --- a/docs/ru/sql-reference/statements/explain.md +++ b/docs/ru/sql-reference/statements/explain.md @@ -248,10 +248,8 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; - `Keys` — массив столбцов, используемых индексом. - `Condition` — строка с используемым условием. - `Description` — индекс (на данный момент используется только для индекса `Skip`). -- `Initial Parts` — количество кусков до применения индекса. -- `Selected Parts` — количество кусков после применения индекса. -- `Initial Granules` — количество гранул до применения индекса. -- `Selected Granulesis` — количество гранул после применения индекса. +- `Parts` — количество кусков до/после применения индекса. +- `Granules` — количество гранул до/после применения индекса. Пример: @@ -262,46 +260,36 @@ EXPLAIN json = 1, description = 0, header = 1 SELECT 1, 2 + dummy; "Type": "MinMax", "Keys": ["y"], "Condition": "(y in [1, +inf))", - "Initial Parts": 5, - "Selected Parts": 4, - "Initial Granules": 12, - "Selected Granules": 11 + "Parts": 5/4, + "Granules": 12/11 }, { "Type": "Partition", "Keys": ["y", "bitAnd(z, 3)"], "Condition": "and((bitAnd(z, 3) not in [1, 1]), and((y in [1, +inf)), (bitAnd(z, 3) not in [1, 1])))", - "Initial Parts": 4, - "Selected Parts": 3, - "Initial Granules": 11, - "Selected Granules": 10 + "Parts": 4/3, + "Granules": 11/10 }, { "Type": "PrimaryKey", "Keys": ["x", "y"], "Condition": "and((x in [11, +inf)), (y in [1, +inf)))", - "Initial Parts": 3, - "Selected Parts": 2, - "Initial Granules": 10, - "Selected Granules": 6 + "Parts": 3/2, + "Granules": 10/6 }, { "Type": "Skip", "Name": "t_minmax", "Description": "minmax GRANULARITY 2", - "Initial Parts": 2, - "Selected Parts": 1, - "Initial Granules": 6, - "Selected Granules": 2 + "Parts": 2/1, + "Granules": 6/2 }, { "Type": "Skip", "Name": "t_set", "Description": "set GRANULARITY 2", - "Initial Parts": 1, - "Selected Parts": 1, - "Initial Granules": 2, - "Selected Granules": 1 + "": 1/1, + "Granules": 2/1 } ] ``` diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index baf04567f00..6bb01d215eb 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -973,7 +973,7 @@ void Client::processOptions(const OptionsDescription & options_description, if (external_tables.back().file == "-") ++number_of_external_tables_with_stdin_source; if (number_of_external_tables_with_stdin_source > 1) - throw Exception("Two or more external tables has stdin (-) set as --file field", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Two or more external tables has stdin (-) set as --file field"); } catch (const Exception & e) { @@ -1026,7 +1026,7 @@ void Client::processOptions(const OptionsDescription & options_description, } if (options.count("config-file") && options.count("config")) - throw Exception("Two or more configuration files referenced in arguments", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Two or more configuration files referenced in arguments"); if (options.count("config")) config().setString("config-file", options["config"].as()); @@ -1217,14 +1217,14 @@ void Client::readArguments( /// param_name value ++arg_num; if (arg_num >= argc) - throw Exception("Parameter requires value", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter requires value"); arg = argv[arg_num]; query_parameters.emplace(String(param_continuation), String(arg)); } else { if (equal_pos == 0) - throw Exception("Parameter name cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter name cannot be empty"); /// param_name=value query_parameters.emplace(param_continuation.substr(0, equal_pos), param_continuation.substr(equal_pos + 1)); @@ -1238,7 +1238,7 @@ void Client::readArguments( { ++arg_num; if (arg_num >= argc) - throw Exception("Host argument requires value", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Host argument requires value"); arg = argv[arg_num]; host_arg = "--host="; host_arg.append(arg); @@ -1270,7 +1270,7 @@ void Client::readArguments( port_arg.push_back('='); ++arg_num; if (arg_num >= argc) - throw Exception("Port argument requires value", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Port argument requires value"); arg = argv[arg_num]; port_arg.append(arg); } diff --git a/programs/compressor/Compressor.cpp b/programs/compressor/Compressor.cpp index fe8debcee27..b60138b5692 100644 --- a/programs/compressor/Compressor.cpp +++ b/programs/compressor/Compressor.cpp @@ -46,7 +46,7 @@ void checkAndWriteHeader(DB::ReadBuffer & in, DB::WriteBuffer & out) UInt32 size_compressed = unalignedLoad(&header[1]); if (size_compressed > DBMS_MAX_COMPRESSED_SIZE) - throw DB::Exception("Too large size_compressed. Most likely corrupted data.", DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED); + throw DB::Exception(DB::ErrorCodes::TOO_LARGE_SIZE_COMPRESSED, "Too large size_compressed. Most likely corrupted data."); UInt32 size_decompressed = unalignedLoad(&header[5]); @@ -113,10 +113,10 @@ int mainEntryClickHouseCompressor(int argc, char ** argv) codecs = options["codec"].as>(); if ((use_lz4hc || use_zstd || use_deflate_qpl || use_none) && !codecs.empty()) - throw Exception("Wrong options, codec flags like --zstd and --codec options are mutually exclusive", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong options, codec flags like --zstd and --codec options are mutually exclusive"); if (!codecs.empty() && options.count("level")) - throw Exception("Wrong options, --level is not compatible with --codec list", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong options, --level is not compatible with --codec list"); std::string method_family = "LZ4"; diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index 7653b19f21c..816fa561a6a 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -77,7 +77,7 @@ decltype(auto) ClusterCopier::retry(T && func, UInt64 max_tries) std::exception_ptr exception; if (max_tries == 0) - throw Exception("Cannot perform zero retries", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot perform zero retries"); for (UInt64 try_number = 1; try_number <= max_tries; ++try_number) { @@ -123,7 +123,7 @@ void ClusterCopier::discoverShardPartitions(const ConnectionTimeouts & timeouts, } catch (Exception & e) { - throw Exception("Partition " + partition_text_quoted + " has incorrect format. " + e.displayText(), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Partition {} has incorrect format. {}", partition_text_quoted, e.displayText()); } }; @@ -325,8 +325,8 @@ void ClusterCopier::process(const ConnectionTimeouts & timeouts) if (!table_is_done) { - throw Exception("Too many tries to process table " + task_table.table_id + ". Abort remaining execution", - ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Too many tries to process table {}. Abort remaining execution", + task_table.table_id); } } } @@ -666,7 +666,7 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t } if (inject_fault) - throw Exception("Copy fault injection is activated", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); } /// Create node to signal that we finished moving @@ -753,7 +753,7 @@ std::shared_ptr rewriteCreateQueryStorage(const ASTPtr & create_ auto res = std::make_shared(create); if (create.storage == nullptr || new_storage_ast == nullptr) - throw Exception("Storage is not specified", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Storage is not specified"); res->setDatabase(new_table.first); res->setTable(new_table.second); @@ -775,7 +775,7 @@ bool ClusterCopier::tryDropPartitionPiece( const CleanStateClock & clean_state_clock) { if (is_safe_mode) - throw Exception("DROP PARTITION is prohibited in safe mode", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DROP PARTITION is prohibited in safe mode"); TaskTable & task_table = task_partition.task_shard.task_table; ShardPartitionPiece & partition_piece = task_partition.pieces[current_piece_number]; @@ -944,7 +944,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab for (const String & partition_name : task_table.ordered_partition_names) { if (!task_table.cluster_partitions.contains(partition_name)) - throw Exception("There are no expected partition " + partition_name + ". It is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no expected partition {}. It is a bug", partition_name); ClusterPartition & cluster_partition = task_table.cluster_partitions[partition_name]; @@ -1006,7 +1006,7 @@ bool ClusterCopier::tryProcessTable(const ConnectionTimeouts & timeouts, TaskTab /// Previously when we discovered that shard does not contain current partition, we skipped it. /// At this moment partition have to be present. if (it_shard_partition == shard->partition_tasks.end()) - throw Exception("There are no such partition in a shard. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no such partition in a shard. This is a bug."); auto & partition = it_shard_partition->second; expected_shards.emplace_back(shard); @@ -1587,7 +1587,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( auto cancel_check = [&] () { if (zookeeper->expired()) - throw Exception("ZooKeeper session is expired, cancel INSERT SELECT", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "ZooKeeper session is expired, cancel INSERT SELECT"); if (!future_is_dirty_checker.valid()) future_is_dirty_checker = zookeeper->asyncExists(piece_is_dirty_flag_path); @@ -1603,7 +1603,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( LogicalClock dirt_discovery_epoch (status.stat.mzxid); if (dirt_discovery_epoch == clean_state_clock.discovery_zxid) return false; - throw Exception("Partition is dirty, cancel INSERT SELECT", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Partition is dirty, cancel INSERT SELECT"); } } @@ -1646,7 +1646,7 @@ TaskStatus ClusterCopier::processPartitionPieceTaskImpl( future_is_dirty_checker.get(); if (inject_fault) - throw Exception("Copy fault injection is activated", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Copy fault injection is activated"); } catch (...) { diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 128194b401c..56c4dfa1dba 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -90,9 +90,7 @@ ASTPtr extractPartitionKey(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { - throw Exception( - "Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } if (isExtendedDefinitionStorage(storage_ast)) @@ -109,14 +107,13 @@ ASTPtr extractPartitionKey(const ASTPtr & storage_ast) size_t min_args = is_replicated ? 3 : 1; if (!engine.arguments) - throw Exception("Expected arguments in " + storage_str, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str); ASTPtr arguments_ast = engine.arguments->clone(); ASTs & arguments = arguments_ast->children; if (arguments.size() < min_args) - throw Exception("Expected at least " + toString(min_args) + " arguments in " + storage_str, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str); ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1]; return makeASTFunction("toYYYYMM", month_arg->clone()); @@ -132,14 +129,12 @@ ASTPtr extractPrimaryKey(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { - throw Exception("Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } if (!isExtendedDefinitionStorage(storage_ast)) { - throw Exception("Is not extended deginition storage " + storage_str + " Will be fixed later.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); } if (storage.primary_key) @@ -158,20 +153,18 @@ ASTPtr extractOrderBy(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { - throw Exception("Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } if (!isExtendedDefinitionStorage(storage_ast)) { - throw Exception("Is not extended deginition storage " + storage_str + " Will be fixed later.", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); } if (storage.order_by) return storage.order_by->clone(); - throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); } /// Wraps only identifiers with backticks. @@ -191,7 +184,7 @@ std::string wrapIdentifiersWithBackticks(const ASTPtr & root) return boost::algorithm::join(function_arguments, ", "); } - throw Exception("Primary key could be represented only as columns or functions from columns.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns."); } @@ -210,9 +203,9 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) size_t sorting_key_size = sorting_key_expr_list->children.size(); if (primary_key_size > sorting_key_size) - throw Exception("Primary key must be a prefix of the sorting key, but its length: " - + toString(primary_key_size) + " is greater than the sorting key length: " + toString(sorting_key_size), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " + "{} is greater than the sorting key length: {}", + primary_key_size, sorting_key_size); Names primary_key_columns; NameSet primary_key_columns_set; @@ -228,12 +221,12 @@ Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) { String pk_column = primary_key_expr_list->children[i]->getColumnName(); if (pk_column != sorting_key_column) - throw Exception("Primary key must be a prefix of the sorting key, but the column in the position " - + toString(i) + " is " + sorting_key_column +", not " + pk_column, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Primary key must be a prefix of the sorting key, " + "but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column); if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception("Primary key contains duplicate columns", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i])); } @@ -250,9 +243,7 @@ bool isReplicatedTableEngine(const ASTPtr & storage_ast) if (!endsWith(engine.name, "MergeTree")) { String storage_str = queryToString(storage_ast); - throw Exception( - "Unsupported engine was specified in " + storage_str + ", only *MergeTree engines are supported", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); } return startsWith(engine.name, "Replicated"); diff --git a/programs/copier/Internals.h b/programs/copier/Internals.h index 64ab0019d05..b3c9936cd33 100644 --- a/programs/copier/Internals.h +++ b/programs/copier/Internals.h @@ -119,7 +119,7 @@ struct TaskStateWithOwner rb >> state >> "\n" >> escape >> res.owner; if (state >= static_cast(TaskState::Unknown)) - throw Exception("Unknown state " + data, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state {}", data); res.state = static_cast(state); return res; diff --git a/programs/copier/TaskCluster.cpp b/programs/copier/TaskCluster.cpp index 957c7d2120d..053ef39aa81 100644 --- a/programs/copier/TaskCluster.cpp +++ b/programs/copier/TaskCluster.cpp @@ -19,7 +19,7 @@ void DB::TaskCluster::loadTasks(const Poco::Util::AbstractConfiguration & config clusters_prefix = prefix + "remote_servers"; if (!config.has(clusters_prefix)) - throw Exception("You should specify list of clusters in " + clusters_prefix, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "You should specify list of clusters in {}", clusters_prefix); Poco::Util::AbstractConfiguration::Keys tables_keys; config.keys(prefix + "tables", tables_keys); diff --git a/programs/copier/TaskTable.cpp b/programs/copier/TaskTable.cpp index 65eaf8b7108..451a33a1c02 100644 --- a/programs/copier/TaskTable.cpp +++ b/programs/copier/TaskTable.cpp @@ -102,7 +102,7 @@ TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConfigurati for (const String &key : keys) { if (!startsWith(key, "partition")) - throw Exception("Unknown key " + key + " in " + enabled_partitions_prefix, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown key {} in {}", key, enabled_partitions_prefix); enabled_partitions.emplace_back(config.getString(enabled_partitions_prefix + "." + key)); } @@ -213,8 +213,7 @@ ClusterPartition & TaskTable::getClusterPartition(const String & partition_name) { auto it = cluster_partitions.find(partition_name); if (it == cluster_partitions.end()) - throw Exception("There are no cluster partition " + partition_name + " in " + table_id, - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are no cluster partition {} in {}", partition_name, table_id); return it->second; } diff --git a/programs/disks/CommandCopy.cpp b/programs/disks/CommandCopy.cpp index 1e4a3ba6908..1cfce7fc022 100644 --- a/programs/disks/CommandCopy.cpp +++ b/programs/disks/CommandCopy.cpp @@ -44,7 +44,7 @@ public: if (command_arguments.size() != 2) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name_from = config.getString("diskFrom", config.getString("disk", "default")); diff --git a/programs/disks/CommandLink.cpp b/programs/disks/CommandLink.cpp index 766d03a0b6b..0e94eb87c04 100644 --- a/programs/disks/CommandLink.cpp +++ b/programs/disks/CommandLink.cpp @@ -33,7 +33,7 @@ public: if (command_arguments.size() != 2) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandList.cpp b/programs/disks/CommandList.cpp index a1d41316b9d..470784bff00 100644 --- a/programs/disks/CommandList.cpp +++ b/programs/disks/CommandList.cpp @@ -40,7 +40,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandListDisks.cpp b/programs/disks/CommandListDisks.cpp index a6b38f60a67..7b2fcd16107 100644 --- a/programs/disks/CommandListDisks.cpp +++ b/programs/disks/CommandListDisks.cpp @@ -32,7 +32,7 @@ public: if (!command_arguments.empty()) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } for (const auto & [disk_name, _] : global_context->getDisksMap()) diff --git a/programs/disks/CommandMkDir.cpp b/programs/disks/CommandMkDir.cpp index b4b08391663..c938cc52132 100644 --- a/programs/disks/CommandMkDir.cpp +++ b/programs/disks/CommandMkDir.cpp @@ -41,7 +41,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandMove.cpp b/programs/disks/CommandMove.cpp index 085a0e2d5eb..3c564f3bcd3 100644 --- a/programs/disks/CommandMove.cpp +++ b/programs/disks/CommandMove.cpp @@ -32,7 +32,7 @@ public: if (command_arguments.size() != 2) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandRead.cpp b/programs/disks/CommandRead.cpp index 2a04dd7a902..2dd5c191d10 100644 --- a/programs/disks/CommandRead.cpp +++ b/programs/disks/CommandRead.cpp @@ -43,7 +43,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandRemove.cpp b/programs/disks/CommandRemove.cpp index c742cdec042..ff8d4a1c6bb 100644 --- a/programs/disks/CommandRemove.cpp +++ b/programs/disks/CommandRemove.cpp @@ -32,7 +32,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/CommandWrite.cpp b/programs/disks/CommandWrite.cpp index 152cb33c458..b055c6f9343 100644 --- a/programs/disks/CommandWrite.cpp +++ b/programs/disks/CommandWrite.cpp @@ -44,7 +44,7 @@ public: if (command_arguments.size() != 1) { printHelpMessage(); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } String disk_name = config.getString("disk", "default"); diff --git a/programs/disks/DisksApp.cpp b/programs/disks/DisksApp.cpp index 0e0e34f7d10..b81cd52f8c8 100644 --- a/programs/disks/DisksApp.cpp +++ b/programs/disks/DisksApp.cpp @@ -119,7 +119,7 @@ void DisksApp::init(std::vector & common_arguments) { std::cerr << "Unknown command name: " << command_name << "\n"; printHelpMessage(options_description); - throw DB::Exception("Bad Arguments", DB::ErrorCodes::BAD_ARGUMENTS); + throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Bad Arguments"); } processOptions(); diff --git a/programs/git-import/git-import.cpp b/programs/git-import/git-import.cpp index 9e464164da6..82eade8d27b 100644 --- a/programs/git-import/git-import.cpp +++ b/programs/git-import/git-import.cpp @@ -1160,7 +1160,7 @@ void processLog(const Options & options) /// Will run multiple processes in parallel size_t num_threads = options.threads; if (num_threads == 0) - throw Exception("num-threads cannot be zero", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "num-threads cannot be zero"); std::vector> show_commands(num_threads); for (size_t i = 0; i < num_commits && i < num_threads; ++i) diff --git a/programs/keeper/Keeper.cpp b/programs/keeper/Keeper.cpp index 25452b808e2..f16dbc91344 100644 --- a/programs/keeper/Keeper.cpp +++ b/programs/keeper/Keeper.cpp @@ -484,8 +484,7 @@ try config().getUInt64("keeper_server.socket_send_timeout_sec", DBMS_DEFAULT_SEND_TIMEOUT_SEC), true), server_pool, socket)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); diff --git a/programs/library-bridge/CatBoostLibraryHandler.cpp b/programs/library-bridge/CatBoostLibraryHandler.cpp index 4fe539a53b2..f08b4ca49ee 100644 --- a/programs/library-bridge/CatBoostLibraryHandler.cpp +++ b/programs/library-bridge/CatBoostLibraryHandler.cpp @@ -169,7 +169,7 @@ std::vector> placeStringColumns(const ColumnRawPtrs & columns, si else if (const auto * column_fixed_string = typeid_cast(column)) data.push_back(placeFixedStringColumn(*column_fixed_string, buffer + i, size)); else - throw Exception("Cannot place string column.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot place string column."); } return data; diff --git a/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp b/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp index c60d4a4e5cc..2cc2df03a1d 100644 --- a/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp +++ b/programs/library-bridge/ExternalDictionaryLibraryHandler.cpp @@ -32,7 +32,7 @@ ExternalDictionaryLibraryHandler::ExternalDictionaryLibraryHandler( if (lib_new) lib_data = lib_new(&settings_holder->strings, ExternalDictionaryLibraryAPI::log); else - throw Exception("Method extDict_libNew failed", ErrorCodes::EXTERNAL_LIBRARY_ERROR); + throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "Method extDict_libNew failed"); } @@ -173,22 +173,21 @@ Block ExternalDictionaryLibraryHandler::loadKeys(const Columns & key_columns) Block ExternalDictionaryLibraryHandler::dataToBlock(ExternalDictionaryLibraryAPI::RawClickHouseLibraryTable data) { if (!data) - throw Exception("LibraryDictionarySource: No data returned", ErrorCodes::EXTERNAL_LIBRARY_ERROR); + throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "LibraryDictionarySource: No data returned"); const auto * columns_received = static_cast(data); if (columns_received->error_code) - throw Exception( - "LibraryDictionarySource: Returned error: " + std::to_string(columns_received->error_code) + " " + (columns_received->error_string ? columns_received->error_string : ""), - ErrorCodes::EXTERNAL_LIBRARY_ERROR); + throw Exception(ErrorCodes::EXTERNAL_LIBRARY_ERROR, "LibraryDictionarySource: Returned error: {} {}", + std::to_string(columns_received->error_code), (columns_received->error_string ? columns_received->error_string : "")); MutableColumns columns = sample_block.cloneEmptyColumns(); for (size_t col_n = 0; col_n < columns_received->size; ++col_n) { if (columns.size() != columns_received->data[col_n].size) - throw Exception( - "LibraryDictionarySource: Returned unexpected number of columns: " + std::to_string(columns_received->data[col_n].size) + ", must be " + std::to_string(columns.size()), - ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "LibraryDictionarySource: " + "Returned unexpected number of columns: {}, must be {}", + columns_received->data[col_n].size, columns.size()); for (size_t row_n = 0; row_n < columns_received->data[col_n].size; ++row_n) { diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index b36d3a85bd3..2f0f98ae857 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -359,7 +359,7 @@ void LocalServer::setupUsers() if (users_config) global_context->setUsersConfig(users_config); else - throw Exception("Can't load config for users", ErrorCodes::CANNOT_LOAD_CONFIG); + throw Exception(ErrorCodes::CANNOT_LOAD_CONFIG, "Can't load config for users"); } void LocalServer::connect() @@ -489,7 +489,7 @@ void LocalServer::processConfig() if (is_interactive && !delayed_interactive) { if (config().has("query") && config().has("queries-file")) - throw Exception("Specify either `query` or `queries-file` option", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Specify either `query` or `queries-file` option"); if (config().has("multiquery")) is_multiquery = true; diff --git a/programs/obfuscator/Obfuscator.cpp b/programs/obfuscator/Obfuscator.cpp index b6952ad6cb0..274ad29a174 100644 --- a/programs/obfuscator/Obfuscator.cpp +++ b/programs/obfuscator/Obfuscator.cpp @@ -880,7 +880,7 @@ public: } if (!it) - throw Exception("Logical error in markov model", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in markov model"); size_t offset_from_begin_of_string = pos - data; size_t determinator_sliding_window_size = params.determinator_sliding_window_size; @@ -1139,7 +1139,7 @@ public: if (const auto * type = typeid_cast(&data_type)) return std::make_unique(get(*type->getNestedType(), seed, markov_model_params)); - throw Exception("Unsupported data type", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported data type"); } }; @@ -1384,7 +1384,7 @@ try UInt8 version = 0; readBinary(version, model_in); if (version != 0) - throw Exception("Unknown version of the model file", ErrorCodes::UNKNOWN_FORMAT_VERSION); + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unknown version of the model file"); readBinary(source_rows, model_in); @@ -1392,14 +1392,14 @@ try size_t header_size = 0; readBinary(header_size, model_in); if (header_size != data_types.size()) - throw Exception("The saved model was created for different number of columns", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "The saved model was created for different number of columns"); for (size_t i = 0; i < header_size; ++i) { String type; readBinary(type, model_in); if (type != data_types[i]) - throw Exception("The saved model was created for different types of columns", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "The saved model was created for different types of columns"); } obfuscator.deserialize(model_in); diff --git a/programs/odbc-bridge/ColumnInfoHandler.cpp b/programs/odbc-bridge/ColumnInfoHandler.cpp index bf11947d436..6e93246e59a 100644 --- a/programs/odbc-bridge/ColumnInfoHandler.cpp +++ b/programs/odbc-bridge/ColumnInfoHandler.cpp @@ -181,7 +181,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ } if (columns.empty()) - throw Exception("Columns definition was not returned", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns definition was not returned"); WriteBufferFromHTTPServerResponse out( response, diff --git a/programs/odbc-bridge/ODBCBlockInputStream.cpp b/programs/odbc-bridge/ODBCBlockInputStream.cpp index 5bbc39dc559..3aa3d9a652b 100644 --- a/programs/odbc-bridge/ODBCBlockInputStream.cpp +++ b/programs/odbc-bridge/ODBCBlockInputStream.cpp @@ -163,7 +163,7 @@ void ODBCSource::insertValue( break; } default: - throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Unsupported value type"); } } diff --git a/programs/odbc-bridge/ODBCPooledConnectionFactory.h b/programs/odbc-bridge/ODBCPooledConnectionFactory.h index f6185bffd1d..e425dea47f7 100644 --- a/programs/odbc-bridge/ODBCPooledConnectionFactory.h +++ b/programs/odbc-bridge/ODBCPooledConnectionFactory.h @@ -151,7 +151,7 @@ public: auto connection_available = pool->tryBorrowObject(connection, []() { return nullptr; }, ODBC_POOL_WAIT_TIMEOUT); if (!connection_available) - throw Exception("Unable to fetch connection within the timeout", ErrorCodes::NO_FREE_CONNECTION); + throw Exception(ErrorCodes::NO_FREE_CONNECTION, "Unable to fetch connection within the timeout"); try { diff --git a/programs/odbc-bridge/getIdentifierQuote.cpp b/programs/odbc-bridge/getIdentifierQuote.cpp index 09cdd31bb2e..793e398363c 100644 --- a/programs/odbc-bridge/getIdentifierQuote.cpp +++ b/programs/odbc-bridge/getIdentifierQuote.cpp @@ -44,7 +44,8 @@ IdentifierQuotingStyle getQuotingStyle(nanodbc::ConnectionHolderPtr connection) else if (identifier_quote[0] == '"') return IdentifierQuotingStyle::DoubleQuotes; else - throw Exception("Can not map quote identifier '" + identifier_quote + "' to IdentifierQuotingStyle value", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Can not map quote identifier '{}' to IdentifierQuotingStyle value", identifier_quote); } } diff --git a/programs/odbc-bridge/validateODBCConnectionString.cpp b/programs/odbc-bridge/validateODBCConnectionString.cpp index a87a8f58ede..6c6e11162b4 100644 --- a/programs/odbc-bridge/validateODBCConnectionString.cpp +++ b/programs/odbc-bridge/validateODBCConnectionString.cpp @@ -38,10 +38,10 @@ std::string validateODBCConnectionString(const std::string & connection_string) static constexpr size_t MAX_CONNECTION_STRING_SIZE = 1000; if (connection_string.empty()) - throw Exception("ODBC connection string cannot be empty", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string cannot be empty"); if (connection_string.size() >= MAX_CONNECTION_STRING_SIZE) - throw Exception("ODBC connection string is too long", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string is too long"); const char * pos = connection_string.data(); const char * end = pos + connection_string.size(); @@ -51,7 +51,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) while (pos < end && isWhitespaceASCII(*pos)) { if (*pos != ' ') - throw Exception("ODBC connection string parameter contains unusual whitespace character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter contains unusual whitespace character"); ++pos; } }; @@ -63,7 +63,8 @@ std::string validateODBCConnectionString(const std::string & connection_string) if (pos < end && isValidIdentifierBegin(*pos)) ++pos; else - throw Exception("ODBC connection string parameter name doesn't begin with valid identifier character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, + "ODBC connection string parameter name doesn't begin with valid identifier character"); /// Additionally allow dash and dot symbols in names. /// Strictly speaking, the name with that characters should be escaped. @@ -83,7 +84,8 @@ std::string validateODBCConnectionString(const std::string & connection_string) { signed char c = *pos; if (c < 32 || strchr("[]{}(),;?*=!@'\"", c) != nullptr) - throw Exception("ODBC connection string parameter value is unescaped and contains illegal character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, + "ODBC connection string parameter value is unescaped and contains illegal character"); ++pos; } @@ -97,7 +99,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) if (pos < end && *pos == '{') ++pos; else - throw Exception("ODBC connection string parameter value doesn't begin with opening curly brace", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter value doesn't begin with opening curly brace"); while (pos < end) { @@ -109,13 +111,13 @@ std::string validateODBCConnectionString(const std::string & connection_string) } if (*pos == 0) - throw Exception("ODBC connection string parameter value contains ASCII NUL character", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter value contains ASCII NUL character"); res += *pos; ++pos; } - throw Exception("ODBC connection string parameter is escaped but there is no closing curly brace", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter is escaped but there is no closing curly brace"); }; auto read_value = [&] @@ -139,25 +141,25 @@ std::string validateODBCConnectionString(const std::string & connection_string) Poco::toUpperInPlace(name); if (name == "FILEDSN" || name == "SAVEFILE" || name == "DRIVER") - throw Exception("ODBC connection string has forbidden parameter", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string has forbidden parameter"); if (pos >= end) - throw Exception("ODBC connection string parameter doesn't have value", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter doesn't have value"); if (*pos == '=') ++pos; else - throw Exception("ODBC connection string parameter doesn't have value", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string parameter doesn't have value"); skip_whitespaces(); std::string value = read_value(); skip_whitespaces(); if (name.size() > MAX_ELEMENT_SIZE || value.size() > MAX_ELEMENT_SIZE) - throw Exception("ODBC connection string has too long keyword or value", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string has too long keyword or value"); if (!parameters.emplace(name, value).second) - throw Exception("Duplicate parameter found in ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "Duplicate parameter found in ODBC connection string"); if (pos >= end) break; @@ -165,7 +167,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) if (*pos == ';') ++pos; else - throw Exception("Unexpected character found after parameter value in ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "Unexpected character found after parameter value in ODBC connection string"); } /// Reconstruct the connection string. @@ -173,12 +175,12 @@ std::string validateODBCConnectionString(const std::string & connection_string) auto it = parameters.find("DSN"); if (parameters.end() == it) - throw Exception("DSN parameter is mandatory for ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "DSN parameter is mandatory for ODBC connection string"); std::string dsn = it->second; if (dsn.empty()) - throw Exception("DSN parameter cannot be empty in ODBC connection string", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "DSN parameter cannot be empty in ODBC connection string"); parameters.erase(it); @@ -241,7 +243,7 @@ std::string validateODBCConnectionString(const std::string & connection_string) write_element(elem.first, elem.second); if (reconstructed_connection_string.size() >= MAX_CONNECTION_STRING_SIZE) - throw Exception("ODBC connection string is too long", ErrorCodes::BAD_ODBC_CONNECTION_STRING); + throw Exception(ErrorCodes::BAD_ODBC_CONNECTION_STRING, "ODBC connection string is too long"); return reconstructed_connection_string; } diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 419b80ccff2..38b3970da7d 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -257,7 +257,7 @@ static std::string getCanonicalPath(std::string && path) { Poco::trimInPlace(path); if (path.empty()) - throw Exception("path configuration parameter is empty", ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "path configuration parameter is empty"); if (path.back() != '/') path += '/'; return std::move(path); @@ -1116,7 +1116,7 @@ try #endif if (config().has("interserver_http_port") && config().has("interserver_https_port")) - throw Exception("Both http and https interserver ports are specified", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "Both http and https interserver ports are specified"); static const auto interserver_tags = { @@ -1141,7 +1141,7 @@ try int port = parse(port_str); if (port < 0 || port > 0xFFFF) - throw Exception("Out of range '" + String(port_tag) + "': " + toString(port), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Out of range '{}': {}", String(port_tag), port); global_context->setInterserverIOAddress(this_host, port); global_context->setInterserverScheme(scheme); @@ -1419,8 +1419,7 @@ try global_context->getSettingsRef().send_timeout.totalSeconds(), true), server_pool, socket)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); } @@ -1465,7 +1464,7 @@ try size_t max_cache_size = static_cast(memory_amount * cache_size_to_ram_max_ratio); /// Size of cache for uncompressed blocks. Zero means disabled. - String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", ""); + String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "SLRU"); LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy); size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0); if (uncompressed_cache_size > max_cache_size) @@ -1491,7 +1490,7 @@ try /// Size of cache for marks (index of MergeTree family of tables). size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120); - String mark_cache_policy = config().getString("mark_cache_policy", ""); + String mark_cache_policy = config().getString("mark_cache_policy", "SLRU"); if (!mark_cache_size) LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation."); if (mark_cache_size > max_cache_size) @@ -1517,6 +1516,15 @@ try if (mmap_cache_size) global_context->setMMappedFileCache(mmap_cache_size); + /// A cache for query results. + size_t query_result_cache_size = config().getUInt64("query_result_cache.size", 1_GiB); + if (query_result_cache_size) + global_context->setQueryResultCache( + query_result_cache_size, + config().getUInt64("query_result_cache.max_entries", 1024), + config().getUInt64("query_result_cache.max_entry_size", 1_MiB), + config().getUInt64("query_result_cache.max_entry_records", 30'000'000)); + #if USE_EMBEDDED_COMPILER /// 128 MB constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128; @@ -1740,14 +1748,15 @@ try std::lock_guard lock(servers_lock); createServers(config(), listen_hosts, interserver_listen_hosts, listen_try, server_pool, async_metrics, servers); if (servers.empty()) - throw Exception( - "No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " + "to configuration file.)"); } if (servers.empty()) - throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "No servers started (add valid listen_host and 'tcp_port' or 'http_port' " + "to configuration file.)"); #if USE_SSL CertificateReloader::instance().tryLoad(config()); @@ -1807,7 +1816,7 @@ try String ddl_zookeeper_path = config().getString("distributed_ddl.path", "/clickhouse/task_queue/ddl/"); int pool_size = config().getInt("distributed_ddl.pool_size", 1); if (pool_size < 1) - throw Exception("distributed_ddl.pool_size should be greater then 0", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "distributed_ddl.pool_size should be greater then 0"); global_context->setDDLWorker(std::make_unique(pool_size, ddl_zookeeper_path, global_context, &config(), "distributed_ddl", "DDLWorker", &CurrentMetrics::MaxDDLEntryID, &CurrentMetrics::MaxPushedDDLEntryID)); @@ -1936,8 +1945,7 @@ std::unique_ptr Server::buildProtocolStackFromConfig( #if USE_SSL return TCPServerConnectionFactory::Ptr(new TLSHandlerFactory(*this, conf_name)); #else - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif if (type == "proxy1") @@ -2104,8 +2112,7 @@ void Server::createServers( httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params)); #else UNUSED(port); - throw Exception{"HTTPS protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support."); #endif }); @@ -2167,8 +2174,7 @@ void Server::createServers( new Poco::Net::TCPServerParams)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); @@ -2273,8 +2279,7 @@ void Server::createServers( http_params)); #else UNUSED(port); - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif }); } diff --git a/programs/server/config.xml b/programs/server/config.xml index 9c7dc191ba3..0ee7b6faac1 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -1453,6 +1453,14 @@ --> + + + + + + + + don't replace it +} + +void QueryResultCache::Writer::buffer(Chunk && partial_query_result) +{ + if (skip_insert) + return; + + auto & chunks = query_result.chunks; + + chunks->emplace_back(std::move(partial_query_result)); + + new_entry_size_in_bytes += chunks->back().allocatedBytes(); + new_entry_size_in_rows += chunks->back().getNumRows(); + + if ((new_entry_size_in_bytes > max_entry_size_in_bytes) || (new_entry_size_in_rows > max_entry_size_in_rows)) + { + chunks->clear(); /// eagerly free some space + skip_insert = true; + } +} + +void QueryResultCache::Writer::finalizeWrite() +{ + if (skip_insert) + return; + + if (std::chrono::duration_cast(std::chrono::system_clock::now() - query_start_time) < min_query_runtime) + return; + + std::lock_guard lock(mutex); + + if (auto it = cache.find(key); it != cache.end() && !is_stale(it->first)) + return; /// same check as in ctor because a parallel Writer could have inserted the current key in the meantime + + auto sufficient_space_in_cache = [this]() TSA_REQUIRES(mutex) + { + return (cache_size_in_bytes + new_entry_size_in_bytes <= max_cache_size_in_bytes) && (cache.size() + 1 <= max_cache_entries); + }; + + if (!sufficient_space_in_cache()) + { + size_t removed_items = 0; + /// Remove stale entries + for (auto it = cache.begin(); it != cache.end();) + if (is_stale(it->first)) + { + cache_size_in_bytes -= it->second.sizeInBytes(); + it = cache.erase(it); + ++removed_items; + } + else + ++it; + LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Removed {} stale entries", removed_items); + } + + /// Insert or replace if enough space + if (sufficient_space_in_cache()) + { + cache_size_in_bytes += query_result.sizeInBytes(); + if (auto it = cache.find(key); it != cache.end()) + cache_size_in_bytes -= it->second.sizeInBytes(); // key replacement + + cache[key] = std::move(query_result); + LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Stored result of query {}", key.queryStringFromAst()); + } +} + +QueryResultCache::Reader::Reader(const Cache & cache_, const Key & key, size_t & cache_size_in_bytes_, const std::lock_guard &) +{ + auto it = cache_.find(key); + + if (it == cache_.end()) + { + LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "No entry found for query {}", key.queryStringFromAst()); + return; + } + + if (it->first.username.has_value() && it->first.username != key.username) + { + LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Inaccessible entry found for query {}", key.queryStringFromAst()); + return; + } + + if (is_stale(it->first)) + { + cache_size_in_bytes_ -= it->second.sizeInBytes(); + const_cast(cache_).erase(it); + LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Stale entry found and removed for query {}", key.queryStringFromAst()); + return; + } + + pipe = Pipe(std::make_shared(it->first.header, it->second.chunks)); + LOG_TRACE(&Poco::Logger::get("QueryResultCache"), "Entry found for query {}", key.queryStringFromAst()); +} + +bool QueryResultCache::Reader::hasCacheEntryForKey() const +{ + bool res = !pipe.empty(); + + if (res) + ProfileEvents::increment(ProfileEvents::QueryResultCacheHits); + else + ProfileEvents::increment(ProfileEvents::QueryResultCacheMisses); + + return res; +} + +Pipe && QueryResultCache::Reader::getPipe() +{ + chassert(!pipe.empty()); // cf. hasCacheEntryForKey() + return std::move(pipe); +} + +QueryResultCache::QueryResultCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_) + : max_cache_size_in_bytes(max_cache_size_in_bytes_) + , max_cache_entries(max_cache_entries_) + , max_cache_entry_size_in_bytes(max_cache_entry_size_in_bytes_) + , max_cache_entry_size_in_rows(max_cache_entry_size_in_rows_) +{ +} + +QueryResultCache::Reader QueryResultCache::createReader(const Key & key) +{ + std::lock_guard lock(mutex); + return Reader(cache, key, cache_size_in_bytes, lock); +} + +QueryResultCache::Writer QueryResultCache::createWriter(const Key & key, std::chrono::milliseconds min_query_runtime) +{ + std::lock_guard lock(mutex); + return Writer(mutex, cache, key, cache_size_in_bytes, max_cache_size_in_bytes, max_cache_entries, max_cache_entry_size_in_bytes, max_cache_entry_size_in_rows, min_query_runtime); +} + +void QueryResultCache::reset() +{ + std::lock_guard lock(mutex); + cache.clear(); + times_executed.clear(); + cache_size_in_bytes = 0; +} + +size_t QueryResultCache::recordQueryRun(const Key & key) +{ + static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000; + + std::lock_guard times_executed_lock(mutex); + size_t times = ++times_executed[key]; + // Regularly drop times_executed to avoid DOS-by-unlimited-growth. + if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE) + times_executed.clear(); + return times; +} + +} diff --git a/src/Interpreters/Cache/QueryResultCache.h b/src/Interpreters/Cache/QueryResultCache.h new file mode 100644 index 00000000000..65cab854a45 --- /dev/null +++ b/src/Interpreters/Cache/QueryResultCache.h @@ -0,0 +1,168 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +/// Does AST contain non-deterministic functions like rand() and now()? +bool astContainsNonDeterministicFunctions(ASTPtr ast, ContextPtr context); + +/// Maps queries to query results. Useful to avoid repeated query calculation. +/// +/// The cache does not aim to be transactionally consistent (which is difficult to get right). For example, the cache is not invalidated +/// when data is inserted/deleted into/from tables referenced by queries in the cache. In such situations, incorrect results may be +/// returned. In order to still obtain sufficiently up-to-date query results, a expiry time (TTL) must be specified for each cache entry +/// after which it becomes stale and is ignored. Stale entries are removed opportunistically from the cache, they are only evicted when a +/// new entry is inserted and the cache has insufficient capacity. +class QueryResultCache +{ +public: + /// Represents a query result in the cache. + struct Key + { + /// ---------------------------------------------------- + /// The actual key (data which gets hashed): + + /// Unlike the query string, the AST is agnostic to lower/upper case (SELECT vs. select) + const ASTPtr ast; + + /// Note: For a transactionally consistent cache, we would need to include the system settings in the cache key or invalidate the + /// cache whenever the settings change. This is because certain settings (e.g. "additional_table_filters") can affect the query + /// result. + + /// ---------------------------------------------------- + /// Additional stuff data stored in the key, not hashed: + + /// Result metadata for constructing the pipe. + const Block header; + + /// Std::nullopt means that the associated entry can be read by other users. In general, sharing is a bad idea: First, it is + /// unlikely that different users pose the same queries. Second, sharing potentially breaches security. E.g. User A should not be + /// able to bypass row policies on some table by running the same queries as user B for whom no row policies exist. + const std::optional username; + + /// When does the entry expire? + const std::chrono::time_point expires_at; + + Key(ASTPtr ast_, + Block header_, const std::optional & username_, + std::chrono::time_point expires_at_); + + bool operator==(const Key & other) const; + String queryStringFromAst() const; + }; + + struct QueryResult + { + std::shared_ptr chunks = std::make_shared(); + size_t sizeInBytes() const; + + /// Notes: 1. For performance reasons, we cache the original result chunks as-is (no concatenation during cache insert or lookup). + /// 2. Ref-counting (shared_ptr) ensures that eviction of an entry does not affect queries which still read from the cache. + /// (this can also be achieved by copying the chunks during lookup but that would be under the cache lock --> too slow) + }; + +private: + struct KeyHasher + { + size_t operator()(const Key & key) const; + }; + + /// query --> query result + using Cache = std::unordered_map; + + /// query --> query execution count + using TimesExecuted = std::unordered_map; + +public: + /// Buffers multiple partial query result chunks (buffer()) and eventually stores them as cache entry (finalizeWrite()). + /// + /// Implementation note: Queries may throw exceptions during runtime, e.g. out-of-memory errors. In this case, no query result must be + /// written into the query result cache. Unfortunately, neither the Writer nor the special transform added on top of the query pipeline + /// which holds the Writer know whether they are destroyed because the query ended successfully or because of an exception (otherwise, + /// we could simply implement a check in their destructors). To handle exceptions correctly nevertheless, we do the actual insert in + /// finalizeWrite() as opposed to the Writer destructor. This function is then called only for successful queries in finish_callback() + /// which runs before the transform and the Writer are destroyed, whereas for unsuccessful queries we do nothing (the Writer is + /// destroyed w/o inserting anything). + /// Queries may also be cancelled by the user, in which case IProcessor's cancel bit is set. FinalizeWrite() is only called if the + /// cancel bit is not set. + class Writer + { + public: + void buffer(Chunk && partial_query_result); + void finalizeWrite(); + private: + std::mutex & mutex; + Cache & cache TSA_GUARDED_BY(mutex); + const Key key; + size_t & cache_size_in_bytes TSA_GUARDED_BY(mutex); + const size_t max_cache_size_in_bytes; + const size_t max_cache_entries; + size_t new_entry_size_in_bytes = 0; + const size_t max_entry_size_in_bytes; + size_t new_entry_size_in_rows = 0; + const size_t max_entry_size_in_rows; + const std::chrono::time_point query_start_time = std::chrono::system_clock::now(); /// Writer construction and finalizeWrite() coincide with query start/end + const std::chrono::milliseconds min_query_runtime; + QueryResult query_result; + std::atomic skip_insert = false; + + Writer(std::mutex & mutex_, Cache & cache_, const Key & key_, + size_t & cache_size_in_bytes_, size_t max_cache_size_in_bytes_, + size_t max_cache_entries_, + size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_, + std::chrono::milliseconds min_query_runtime_); + + friend class QueryResultCache; /// for createWriter() + }; + + /// Looks up a query result for a key in the cache and (if found) constructs a pipe with the query result chunks as source. + class Reader + { + public: + bool hasCacheEntryForKey() const; + Pipe && getPipe(); /// must be called only if hasCacheEntryForKey() returns true + private: + Reader(const Cache & cache_, const Key & key, size_t & cache_size_in_bytes_, const std::lock_guard &); + Pipe pipe; + friend class QueryResultCache; /// for createReader() + }; + + QueryResultCache(size_t max_cache_size_in_bytes_, size_t max_cache_entries_, size_t max_cache_entry_size_in_bytes_, size_t max_cache_entry_size_in_rows_); + + Reader createReader(const Key & key); + Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime); + + void reset(); + + /// Record new execution of query represented by key. Returns number of executions so far. + size_t recordQueryRun(const Key & key); + +private: + /// Implementation note: The query result implements a custom caching mechanism and doesn't make use of CacheBase, unlike many other + /// internal caches in ClickHouse. The main reason is that we don't need standard CacheBase (S)LRU eviction as the expiry times + /// associated with cache entries provide a "natural" eviction criterion. As a future TODO, we could make an expiry-based eviction + /// policy and use that with CacheBase (e.g. see #23706) + /// TODO To speed up removal of stale entries, we could also add another container sorted on expiry times which maps keys to iterators + /// into the cache. To insert an entry, add it to the cache + add the iterator to the sorted container. To remove stale entries, do a + /// binary search on the sorted container and erase all left of the found key. + mutable std::mutex mutex; + Cache cache TSA_GUARDED_BY(mutex); + TimesExecuted times_executed TSA_GUARDED_BY(mutex); + + size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// updated in each cache insert/delete + const size_t max_cache_size_in_bytes; + const size_t max_cache_entries; + const size_t max_cache_entry_size_in_bytes; + const size_t max_cache_entry_size_in_rows; + + friend class StorageSystemQueryResultCache; +}; + +using QueryResultCachePtr = std::shared_ptr; + +} diff --git a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp index 16906e9440e..08c083b1976 100644 --- a/src/Interpreters/Cache/WriteBufferToFileSegment.cpp +++ b/src/Interpreters/Cache/WriteBufferToFileSegment.cpp @@ -1,5 +1,6 @@ #include #include +#include #include @@ -12,20 +13,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -namespace -{ - class SwapHelper - { - public: - SwapHelper(WriteBuffer & b1_, WriteBuffer & b2_) : b1(b1_), b2(b2_) { b1.swap(b2); } - ~SwapHelper() { b1.swap(b2); } - - private: - WriteBuffer & b1; - WriteBuffer & b2; - }; -} - WriteBufferToFileSegment::WriteBufferToFileSegment(FileSegment * file_segment_) : WriteBufferFromFileDecorator(file_segment_->detachWriter()), file_segment(file_segment_) { diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index b457df53db9..413ff1db6bc 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -22,7 +22,7 @@ namespace ErrorCodes void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception("Logical error: method ClientInfo::write is called for unsupported server revision", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::write is called for unsupported server revision"); writeBinary(static_cast(query_kind), out); if (empty()) @@ -102,7 +102,7 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const void ClientInfo::read(ReadBuffer & in, UInt64 client_protocol_revision) { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_CLIENT_INFO) - throw Exception("Logical error: method ClientInfo::read is called for unsupported client revision", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method ClientInfo::read is called for unsupported client revision"); UInt8 read_query_kind = 0; readBinary(read_query_kind, in); diff --git a/src/Interpreters/Cluster.cpp b/src/Interpreters/Cluster.cpp index b76434b23e7..bf3a66fed99 100644 --- a/src/Interpreters/Cluster.cpp +++ b/src/Interpreters/Cluster.cpp @@ -114,7 +114,7 @@ Cluster::Address::Address( port = static_cast(config.getInt(config_prefix + ".port", default_port)); if (!port) - throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Port is not specified in cluster configuration: {}", config_prefix + ".port"); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Port is not specified in cluster configuration: {}.port", config_prefix); is_local = isLocal(config.getInt(port_type, 0)); @@ -201,7 +201,7 @@ std::pair Cluster::Address::fromString(const String & host_port_ { auto pos = host_port_string.find_last_of(':'); if (pos == std::string::npos) - throw Exception("Incorrect : format " + host_port_string, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect : format {}", host_port_string); return {unescapeForFileName(host_port_string.substr(0, pos)), parse(host_port_string.substr(pos + 1))}; } @@ -213,7 +213,7 @@ String Cluster::Address::toFullString(bool use_compact_format) const { if (shard_index == 0 || replica_index == 0) // shard_num/replica_num like in system.clusters table - throw Exception("shard_num/replica_num cannot be zero", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "shard_num/replica_num cannot be zero"); return fmt::format("shard{}_replica{}", shard_index, replica_index); } @@ -263,12 +263,12 @@ Cluster::Address Cluster::Address::fromFullString(const String & full_string) const char * colon = strchr(full_string.data(), ':'); if (!user_pw_end || !colon) - throw Exception("Incorrect user[:password]@host:port#default_database format " + full_string, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect user[:password]@host:port#default_database format {}", full_string); const bool has_pw = colon < user_pw_end; const char * host_end = has_pw ? strchr(user_pw_end + 1, ':') : colon; if (!host_end) - throw Exception("Incorrect address '" + full_string + "', it does not contain port", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Incorrect address '{}', it does not contain port", full_string); const char * has_db = strchr(full_string.data(), '#'); const char * port_end = has_db ? has_db : address_end; @@ -362,7 +362,7 @@ void Clusters::updateClusters(const Poco::Util::AbstractConfiguration & new_conf } if (key.find('.') != String::npos) - throw Exception("Cluster names with dots are not supported: '" + key + "'", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cluster names with dots are not supported: '{}'", key); /// If old config is set and cluster config wasn't changed, don't update this cluster. if (!old_config || !isSameConfiguration(new_config, *old_config, config_prefix + "." + key)) @@ -396,7 +396,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, boost::range::remove_erase(config_keys, "secret"); if (config_keys.empty()) - throw Exception("No cluster elements (shard, node) specified in config at path " + config_prefix, ErrorCodes::SHARD_HAS_NO_CONNECTIONS); + throw Exception(ErrorCodes::SHARD_HAS_NO_CONNECTIONS, "No cluster elements (shard, node) specified in config at path {}", config_prefix); UInt32 current_shard_num = 1; for (const auto & key : config_keys) @@ -485,7 +485,7 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, } } else - throw Exception("Unknown element in config: " + replica_key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", replica_key); } Addresses shard_local_addresses; @@ -529,13 +529,13 @@ Cluster::Cluster(const Poco::Util::AbstractConfiguration & config, }); } else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key); ++current_shard_num; } if (addresses_with_failover.empty()) - throw Exception("There must be either 'node' or 'shard' elements in config", ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "There must be either 'node' or 'shard' elements in config"); initMisc(); } @@ -632,8 +632,7 @@ void Cluster::initMisc() for (const auto & shard_info : shards_info) { if (!shard_info.isLocal() && !shard_info.hasRemoteConnections()) - throw Exception("Found shard without any specified connection", - ErrorCodes::SHARD_HAS_NO_CONNECTIONS); + throw Exception(ErrorCodes::SHARD_HAS_NO_CONNECTIONS, "Found shard without any specified connection"); } for (const auto & shard_info : shards_info) @@ -672,7 +671,7 @@ std::unique_ptr Cluster::getClusterWithMultipleShards(const std::vector Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Settings & settings) { if (from.addresses_with_failover.empty()) - throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty"); UInt32 shard_num = 0; std::set> unique_hosts; @@ -790,7 +789,7 @@ std::vector Cluster::filterAddressesByShardOrReplica(s const std::string & Cluster::ShardInfo::insertPathForInternalReplication(bool prefer_localhost_replica, bool use_compact_format) const { if (!has_internal_replication) - throw Exception("internal_replication is not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "internal_replication is not set"); const auto & paths = insert_path_for_internal_replication; if (!use_compact_format) diff --git a/src/Interpreters/Cluster.h b/src/Interpreters/Cluster.h index ada04aa1cae..a7f5a914974 100644 --- a/src/Interpreters/Cluster.h +++ b/src/Interpreters/Cluster.h @@ -224,7 +224,7 @@ public: const ShardInfo & getAnyShardInfo() const { if (shards_info.empty()) - throw Exception("Cluster is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cluster is empty"); return shards_info.front(); } diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp index 2e2f886a50a..e228dcc1f4a 100644 --- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp +++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp @@ -152,10 +152,8 @@ void SelectStreamFactory::createForShard( return; } else - throw Exception( - "Local replica of shard " + toString(shard_info.shard_num) - + " is stale (delay: " + toString(local_delay) + "s.), but no other replica configured", - ErrorCodes::ALL_REPLICAS_ARE_STALE); + throw Exception(ErrorCodes::ALL_REPLICAS_ARE_STALE, "Local replica of shard {} is stale (delay: " + "{}s.), but no other replica configured", shard_info.shard_num, toString(local_delay)); } if (!shard_info.hasRemoteConnections()) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 8ea6298c50b..fe31b4d8302 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -132,7 +132,7 @@ void executeQuery( const Settings & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); std::vector plans; SelectStreamFactory::Shards remote_shards; diff --git a/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 9b264cb52a3..fd6fc27faec 100644 --- a/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -48,8 +48,7 @@ void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const else if (isRightIdentifier(table_pos.first) && isLeftIdentifier(table_pos.second)) analyzed_join.addOnKeys(right, left); else - throw Exception("Cannot detect left and right JOIN keys. JOIN ON section is ambiguous.", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Cannot detect left and right JOIN keys. JOIN ON section is ambiguous."); } void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, @@ -78,7 +77,7 @@ void CollectJoinOnKeysMatcher::Data::addAsofJoinKeys(const ASTPtr & left_ast, co void CollectJoinOnKeysMatcher::Data::asofToJoinKeys() { if (!asof_left_key || !asof_right_key) - throw Exception("No inequality in ASOF JOIN ON section.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "No inequality in ASOF JOIN ON section."); addJoinKeys(asof_left_key, asof_right_key, {JoinIdentifierPos::Left, JoinIdentifierPos::Right}); } @@ -87,8 +86,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & if (auto expr_from_table = getTableForIdentifiers(ast, false, data); isDeterminedIdentifier(expr_from_table)) data.analyzed_join.addJoinCondition(ast, isLeftIdentifier(expr_from_table)); else - throw Exception("Unexpected identifier '" + ident.name() + "' in JOIN ON section", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Unexpected identifier '{}' in JOIN ON section", ident.name()); } void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & ast, Data & data) @@ -101,8 +99,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as if (func.name == "equals" || inequality != ASOFJoinInequality::None) { if (func.arguments->children.size() != 2) - throw Exception("Function " + func.name + " takes two arguments, got '" + func.formatForErrorMessage() + "' instead", - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Function {} takes two arguments, got '{}' instead", + func.name, func.formatForErrorMessage()); } if (func.name == "equals") @@ -138,8 +136,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as if (data.is_asof && inequality != ASOFJoinInequality::None) { if (data.asof_left_key || data.asof_right_key) - throw Exception("ASOF JOIN expects exactly one inequality in ON section. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "ASOF JOIN expects exactly one inequality in ON section. " + "Unexpected '{}'", queryToString(ast)); ASTPtr left = func.arguments->children.at(0); ASTPtr right = func.arguments->children.at(1); @@ -149,8 +147,8 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as return; } - throw Exception("Unsupported JOIN ON conditions. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Unsupported JOIN ON conditions. Unexpected '{}'", + queryToString(ast)); } void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vector & out) @@ -158,8 +156,8 @@ void CollectJoinOnKeysMatcher::getIdentifiers(const ASTPtr & ast, std::vectoras()) { if (func->name == "arrayJoin") - throw Exception("Not allowed function in JOIN ON. Unexpected '" + queryToString(ast) + "'", - ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Not allowed function in JOIN ON. Unexpected '{}'", + queryToString(ast)); } else if (const auto * ident = ast->as()) { @@ -199,7 +197,7 @@ const ASTIdentifier * CollectJoinOnKeysMatcher::unrollAliases(const ASTIdentifie it = aliases.find(identifier->name()); if (!max_attempts--) - throw Exception("Cannot unroll aliases for '" + identifier->name() + "'", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unroll aliases for '{}'", identifier->name()); } return identifier; @@ -254,7 +252,7 @@ JoinIdentifierPos CollectJoinOnKeysMatcher::getTableForIdentifiers(const ASTPtr in_left_table = !in_right_table; } else - throw Exception("Column '" + name + "' is ambiguous", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Column '{}' is ambiguous", name); } if (in_left_table) @@ -272,9 +270,9 @@ JoinIdentifierPos CollectJoinOnKeysMatcher::getTableForIdentifiers(const ASTPtr if (membership != JoinIdentifierPos::Unknown && membership != table_number) { if (throw_on_table_mix) - throw Exception("Invalid columns in JOIN ON section. Columns " - + identifiers[0]->getAliasOrColumnName() + " and " + ident->getAliasOrColumnName() - + " are from different tables.", ErrorCodes::INVALID_JOIN_ON_EXPRESSION); + throw Exception(ErrorCodes::INVALID_JOIN_ON_EXPRESSION, "Invalid columns in JOIN ON section. " + "Columns {} and {} are from different tables.", + identifiers[0]->getAliasOrColumnName(), ident->getAliasOrColumnName()); return JoinIdentifierPos::Unknown; } } diff --git a/src/Interpreters/ConcurrentHashJoin.cpp b/src/Interpreters/ConcurrentHashJoin.cpp index 6c77539532f..03c173a73d9 100644 --- a/src/Interpreters/ConcurrentHashJoin.cpp +++ b/src/Interpreters/ConcurrentHashJoin.cpp @@ -167,7 +167,8 @@ IBlocksStreamPtr ConcurrentHashJoin::getNonJoinedBlocks( if (!JoinCommon::hasNonJoinedBlocks(*table_join)) return {}; - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", table_join->kind(), table_join->strictness()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid join type. join kind: {}, strictness: {}", + table_join->kind(), table_join->strictness()); } static ALWAYS_INLINE IColumn::Selector hashToSelector(const WeakHash32 & hash, size_t num_shards) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 0f1126d2502..83fee127fa2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -235,6 +236,7 @@ struct ContextSharedPart : boost::noncopyable mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices. mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices. + mutable QueryResultCachePtr query_result_cache; /// Cache of query results. mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads. ProcessList process_list; /// Executing queries at the moment. GlobalOvercommitTracker global_overcommit_tracker; @@ -622,7 +624,7 @@ ContextMutablePtr Context::createCopy(const ContextPtr & other) ContextMutablePtr Context::createCopy(const ContextWeakPtr & other) { auto ptr = other.lock(); - if (!ptr) throw Exception("Can't copy an expired context", ErrorCodes::LOGICAL_ERROR); + if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't copy an expired context"); return createCopy(ptr); } @@ -657,7 +659,7 @@ String Context::resolveDatabase(const String & database_name) const { String res = database_name.empty() ? getCurrentDatabase() : database_name; if (res.empty()) - throw Exception("Default database is not selected", ErrorCodes::UNKNOWN_DATABASE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Default database is not selected"); return res; } @@ -820,7 +822,7 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s VolumePtr volume = tmp_policy->getVolume(0); if (volume->getDisks().empty()) - throw Exception("No disks volume for temporary files", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files"); for (const auto & disk : volume->getDisks()) { @@ -1164,7 +1166,7 @@ const Block & Context::getScalar(const String & name) const { // This should be a logical error, but it fails the sql_fuzz test too // often, so 'bad arguments' for now. - throw Exception("Scalar " + backQuoteIfNeed(name) + " doesn't exist (internal bug)", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Scalar {} doesn't exist (internal bug)", backQuoteIfNeed(name)); } return it->second; } @@ -1211,7 +1213,7 @@ void Context::addExternalTable(const String & table_name, TemporaryTableHolder & auto lock = getLock(); if (external_tables_mapping.end() != external_tables_mapping.find(table_name)) - throw Exception("Temporary table " + backQuoteIfNeed(table_name) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary table {} already exists.", backQuoteIfNeed(table_name)); external_tables_mapping.emplace(table_name, std::make_shared(std::move(temporary_table))); } @@ -1444,8 +1446,8 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const void Context::addViewSource(const StoragePtr & storage) { if (view_source) - throw Exception( - "Temporary view source storage " + backQuoteIfNeed(view_source->getName()) + " already exists.", ErrorCodes::TABLE_ALREADY_EXISTS); + throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Temporary view source storage {} already exists.", + backQuoteIfNeed(view_source->getName())); view_source = storage; } @@ -1595,13 +1597,13 @@ String Context::getInitialQueryId() const void Context::setCurrentDatabaseNameInGlobalContext(const String & name) { if (!isGlobalContext()) - throw Exception("Cannot set current database for non global context, this method should be used during server initialization", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot set current database for non global context, this method should " + "be used during server initialization"); auto lock = getLock(); if (!current_database.empty()) - throw Exception("Default database name cannot be changed in global context without server restart", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Default database name cannot be changed in global context without server restart"); current_database = name; } @@ -1693,7 +1695,7 @@ void Context::setMacros(std::unique_ptr && macros) ContextMutablePtr Context::getQueryContext() const { auto ptr = query_context.lock(); - if (!ptr) throw Exception("There is no query or query context has expired", ErrorCodes::THERE_IS_NO_QUERY); + if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_QUERY, "There is no query or query context has expired"); return ptr; } @@ -1706,20 +1708,20 @@ bool Context::isInternalSubquery() const ContextMutablePtr Context::getSessionContext() const { auto ptr = session_context.lock(); - if (!ptr) throw Exception("There is no session or session context has expired", ErrorCodes::THERE_IS_NO_SESSION); + if (!ptr) throw Exception(ErrorCodes::THERE_IS_NO_SESSION, "There is no session or session context has expired"); return ptr; } ContextMutablePtr Context::getGlobalContext() const { auto ptr = global_context.lock(); - if (!ptr) throw Exception("There is no global context or global context has expired", ErrorCodes::LOGICAL_ERROR); + if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no global context or global context has expired"); return ptr; } ContextMutablePtr Context::getBufferContext() const { - if (!buffer_context) throw Exception("There is no buffer context", ErrorCodes::LOGICAL_ERROR); + if (!buffer_context) throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no buffer context"); return buffer_context; } @@ -1888,8 +1890,8 @@ BackupsWorker & Context::getBackupsWorker() const { auto lock = getLock(); - const bool allow_concurrent_backups = this->getConfigRef().getBool("allow_concurrent_backups", true); - const bool allow_concurrent_restores = this->getConfigRef().getBool("allow_concurrent_restores", true); + const bool allow_concurrent_backups = this->getConfigRef().getBool("backups.allow_concurrent_backups", true); + const bool allow_concurrent_restores = this->getConfigRef().getBool("backups.allow_concurrent_restores", true); if (!shared->backups_worker) shared->backups_worker.emplace(getSettingsRef().backup_threads, getSettingsRef().restore_threads, allow_concurrent_backups, allow_concurrent_restores); @@ -1932,7 +1934,7 @@ void Context::setUncompressedCache(size_t max_size_in_bytes, const String & unco auto lock = getLock(); if (shared->uncompressed_cache) - throw Exception("Uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uncompressed cache has been already created."); shared->uncompressed_cache = std::make_shared(max_size_in_bytes, uncompressed_cache_policy); } @@ -1958,7 +1960,7 @@ void Context::setMarkCache(size_t cache_size_in_bytes, const String & mark_cache auto lock = getLock(); if (shared->mark_cache) - throw Exception("Mark cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark cache has been already created."); shared->mark_cache = std::make_shared(cache_size_in_bytes, mark_cache_policy); } @@ -1978,11 +1980,13 @@ void Context::dropMarkCache() const ThreadPool & Context::getLoadMarksThreadpool() const { + const auto & config = getConfigRef(); + auto lock = getLock(); if (!shared->load_marks_threadpool) { - constexpr size_t pool_size = 50; - constexpr size_t queue_size = 1000000; + auto pool_size = config.getUInt(".load_marks_threadpool_pool_size", 50); + auto queue_size = config.getUInt(".load_marks_threadpool_queue_size", 1000000); shared->load_marks_threadpool = std::make_unique(pool_size, pool_size, queue_size); } return *shared->load_marks_threadpool; @@ -1993,7 +1997,7 @@ void Context::setIndexUncompressedCache(size_t max_size_in_bytes) auto lock = getLock(); if (shared->index_uncompressed_cache) - throw Exception("Index uncompressed cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index uncompressed cache has been already created."); shared->index_uncompressed_cache = std::make_shared(max_size_in_bytes); } @@ -2019,7 +2023,7 @@ void Context::setIndexMarkCache(size_t cache_size_in_bytes) auto lock = getLock(); if (shared->index_mark_cache) - throw Exception("Index mark cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index mark cache has been already created."); shared->index_mark_cache = std::make_shared(cache_size_in_bytes); } @@ -2037,13 +2041,35 @@ void Context::dropIndexMarkCache() const shared->index_mark_cache->reset(); } +void Context::setQueryResultCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records) +{ + auto lock = getLock(); + + if (shared->query_result_cache) + throw Exception("Query result cache has been already created.", ErrorCodes::LOGICAL_ERROR); + + shared->query_result_cache = std::make_shared(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_records); +} + +QueryResultCachePtr Context::getQueryResultCache() const +{ + auto lock = getLock(); + return shared->query_result_cache; +} + +void Context::dropQueryResultCache() const +{ + auto lock = getLock(); + if (shared->query_result_cache) + shared->query_result_cache->reset(); +} void Context::setMMappedFileCache(size_t cache_size_in_num_entries) { auto lock = getLock(); if (shared->mmap_cache) - throw Exception("Mapped file cache has been already created.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created."); shared->mmap_cache = std::make_shared(cache_size_in_num_entries); } @@ -2078,6 +2104,9 @@ void Context::dropCaches() const if (shared->index_mark_cache) shared->index_mark_cache->reset(); + if (shared->query_result_cache) + shared->query_result_cache->reset(); + if (shared->mmap_cache) shared->mmap_cache->reset(); } @@ -2242,7 +2271,7 @@ void Context::setDDLWorker(std::unique_ptr ddl_worker) { auto lock = getLock(); if (shared->ddl_worker) - throw Exception("DDL background thread has already been initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "DDL background thread has already been initialized"); ddl_worker->startup(); shared->ddl_worker = std::move(ddl_worker); } @@ -2253,12 +2282,12 @@ DDLWorker & Context::getDDLWorker() const if (!shared->ddl_worker) { if (!hasZooKeeper()) - throw Exception("There is no Zookeeper configuration in server config", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no Zookeeper configuration in server config"); if (!hasDistributedDDL()) - throw Exception("There is no DistributedDDL configuration in server config", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no DistributedDDL configuration in server config"); - throw Exception("DDL background thread is not initialized", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "DDL background thread is not initialized"); } return *shared->ddl_worker; } @@ -2560,8 +2589,9 @@ void Context::setInterserverIOAddress(const String & host, UInt16 port) std::pair Context::getInterserverIOAddress() const { if (shared->interserver_io_host.empty() || shared->interserver_io_port == 0) - throw Exception("Parameter 'interserver_http(s)_port' required for replication is not specified in configuration file.", - ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "Parameter 'interserver_http(s)_port' required for replication is not specified " + "in configuration file."); return { shared->interserver_io_host, shared->interserver_io_port }; } @@ -2622,7 +2652,7 @@ std::shared_ptr Context::getCluster(const std::string & cluster_name) c { if (auto res = tryGetCluster(cluster_name)) return res; - throw Exception("Requested cluster '" + cluster_name + "' not found", ErrorCodes::BAD_GET); + throw Exception(ErrorCodes::BAD_GET, "Requested cluster '{}' not found", cluster_name); } @@ -2712,7 +2742,7 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrclusters_mutex); if (!shared->clusters) - throw Exception("Clusters are not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Clusters are not set"); shared->clusters->setCluster(cluster_name, cluster); } @@ -3159,7 +3189,7 @@ void Context::reloadConfig() const { /// Use mutex if callback may be changed after startup. if (!shared->config_reload_callback) - throw Exception("Can't reload config because config_reload_callback is not set.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't reload config because config_reload_callback is not set."); shared->config_reload_callback(); } @@ -3259,7 +3289,7 @@ const NameToNameMap & Context::getQueryParameters() const void Context::setQueryParameter(const String & name, const String & value) { if (!query_parameters.emplace(name, value).second) - throw Exception("Duplicate name " + backQuote(name) + " of query parameter", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Duplicate name {} of query parameter", backQuote(name)); } void Context::addQueryParameters(const NameToNameMap & parameters) @@ -3301,7 +3331,7 @@ std::shared_ptr Context::getActionLocksManager() const void Context::setExternalTablesInitializer(ExternalTablesInitializer && initializer) { if (external_tables_initializer_callback) - throw Exception("External tables initializer is already set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "External tables initializer is already set"); external_tables_initializer_callback = std::move(initializer); } @@ -3320,7 +3350,7 @@ void Context::initializeExternalTablesIfSet() void Context::setInputInitializer(InputInitializer && initializer) { if (input_initializer_callback) - throw Exception("Input initializer is already set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Input initializer is already set"); input_initializer_callback = std::move(initializer); } @@ -3329,7 +3359,7 @@ void Context::setInputInitializer(InputInitializer && initializer) void Context::initializeInput(const StoragePtr & input_storage) { if (!input_initializer_callback) - throw Exception("Input initializer is not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Input initializer is not set"); input_initializer_callback(shared_from_this(), input_storage); /// Reset callback @@ -3340,7 +3370,7 @@ void Context::initializeInput(const StoragePtr & input_storage) void Context::setInputBlocksReaderCallback(InputBlocksReader && reader) { if (input_blocks_reader) - throw Exception("Input blocks reader is already set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Input blocks reader is already set"); input_blocks_reader = std::move(reader); } @@ -3610,7 +3640,7 @@ void Context::setAsynchronousInsertQueue(const std::shared_ptrasync_insert_queue = ptr; } @@ -3811,7 +3841,7 @@ ReadSettings Context::getReadSettings() const res.enable_filesystem_cache_log = settings.enable_filesystem_cache_log; res.enable_filesystem_cache_on_lower_level = settings.enable_filesystem_cache_on_lower_level; - res.max_query_cache_size = settings.max_query_cache_size; + res.filesystem_cache_max_download_size = settings.filesystem_cache_max_download_size; res.skip_download_if_exceeds_query_cache = settings.skip_download_if_exceeds_query_cache; res.remote_read_min_bytes_for_seek = settings.remote_read_min_bytes_for_seek; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 58478ab79b8..10983b15d7f 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -82,6 +82,7 @@ struct Progress; struct FileProgress; class Clusters; class QueryLog; +class QueryResultCache; class QueryThreadLog; class QueryViewsLog; class PartLog; @@ -859,6 +860,11 @@ public: std::shared_ptr getMMappedFileCache() const; void dropMMappedFileCache() const; + /// Create a cache of query results for statements which run repeatedly. + void setQueryResultCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_records); + std::shared_ptr getQueryResultCache() const; + void dropQueryResultCache() const; + /** Clear the caches of the uncompressed blocks and marks. * This is usually done when renaming tables, changing the type of columns, deleting a table. * - since caches are linked to file names, and become incorrect. diff --git a/src/Interpreters/Context_fwd.h b/src/Interpreters/Context_fwd.h index 2564912a297..c7928bbdbf3 100644 --- a/src/Interpreters/Context_fwd.h +++ b/src/Interpreters/Context_fwd.h @@ -39,7 +39,7 @@ struct WithContextImpl inline Shared getContext() const { auto ptr = context.lock(); - if (!ptr) throw Exception("Context has expired", ErrorCodes::LOGICAL_ERROR); + if (!ptr) throw Exception(ErrorCodes::LOGICAL_ERROR, "Context has expired"); return ptr; } diff --git a/src/Interpreters/CrossToInnerJoinVisitor.cpp b/src/Interpreters/CrossToInnerJoinVisitor.cpp index 09aebf874be..0675f2bb19c 100644 --- a/src/Interpreters/CrossToInnerJoinVisitor.cpp +++ b/src/Interpreters/CrossToInnerJoinVisitor.cpp @@ -48,14 +48,14 @@ struct JoinedElement void checkTableName(const DatabaseAndTableWithAlias & table, const String & current_database) const { if (!element.table_expression) - throw Exception("Not a table expression in JOIN (ARRAY JOIN?)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not a table expression in JOIN (ARRAY JOIN?)"); ASTTableExpression * table_expression = element.table_expression->as(); if (!table_expression) - throw Exception("Wrong table expression in JOIN", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong table expression in JOIN"); if (!table.same(DatabaseAndTableWithAlias(*table_expression, current_database))) - throw Exception("Inconsistent table names", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent table names"); } void rewriteCommaToCross() @@ -178,7 +178,7 @@ std::vector getTables(const ASTSelectQuery & select) { const auto * table_element = child->as(); if (!table_element) - throw Exception("Logical error: TablesInSelectQueryElement expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: TablesInSelectQueryElement expected"); JoinedElement & t = joined_tables.emplace_back(*table_element); t.rewriteCommaToCross(); diff --git a/src/Interpreters/DDLTask.cpp b/src/Interpreters/DDLTask.cpp index 0425b3de99b..799f1b0b4f4 100644 --- a/src/Interpreters/DDLTask.cpp +++ b/src/Interpreters/DDLTask.cpp @@ -248,7 +248,7 @@ void DDLTask::setClusterInfo(ContextPtr context, Poco::Logger * log) { auto * query_on_cluster = dynamic_cast(query.get()); if (!query_on_cluster) - throw Exception("Received unknown DDL query", ErrorCodes::UNKNOWN_TYPE_OF_QUERY); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Received unknown DDL query"); cluster_name = query_on_cluster->cluster; cluster = context->tryGetCluster(cluster_name); @@ -321,7 +321,8 @@ bool DDLTask::tryFindHostInCluster() { if (!query_with_table->database) throw Exception(ErrorCodes::INCONSISTENT_CLUSTER_DEFINITION, - "For a distributed DDL on circular replicated cluster its table name must be qualified by database name."); + "For a distributed DDL on circular replicated cluster its table name " + "must be qualified by database name."); if (default_database == query_with_table->getDatabase()) return true; diff --git a/src/Interpreters/DDLWorker.cpp b/src/Interpreters/DDLWorker.cpp index d427e97828b..0f91212e6a9 100644 --- a/src/Interpreters/DDLWorker.cpp +++ b/src/Interpreters/DDLWorker.cpp @@ -994,7 +994,7 @@ void DDLWorker::createStatusDirs(const std::string & node_path, const ZooKeeperP String DDLWorker::enqueueQuery(DDLLogEntry & entry) { if (entry.hosts.empty()) - throw Exception("Empty host list in a distributed DDL task", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty host list in a distributed DDL task"); auto zookeeper = getAndSetZooKeeper(); diff --git a/src/Interpreters/DatabaseAndTableWithAlias.cpp b/src/Interpreters/DatabaseAndTableWithAlias.cpp index 7fb581c1b4d..9b6ce4f22d3 100644 --- a/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -37,7 +37,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & ident else if (identifier.name_parts.size() == 1) table = identifier.name_parts[0]; else - throw Exception("Logical error: invalid identifier", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: invalid identifier"); if (database.empty()) database = current_database; @@ -50,7 +50,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const else if (const auto * identifier = node->as()) *this = DatabaseAndTableWithAlias(*identifier, current_database); else - throw Exception("Logical error: identifier or table identifier expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: identifier or table identifier expected"); } DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database) @@ -70,7 +70,7 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & alias = table_expression.subquery->tryGetAlias(); } else - throw Exception("Logical error: no known elements in ASTTableExpression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no known elements in ASTTableExpression"); } bool DatabaseAndTableWithAlias::satisfies(const DatabaseAndTableWithAlias & db_table, bool table_may_be_an_alias) const diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index af88146c7b2..10141c78d2b 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -136,7 +136,7 @@ StoragePtr TemporaryTableHolder::getTable() const { auto table = temporary_tables->tryGetTable("_tmp_" + toString(id), getContext()); if (!table) - throw Exception("Temporary table " + getGlobalTableID().getNameForLogs() + " not found", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary table {} not found", getGlobalTableID().getNameForLogs()); return table; } @@ -390,7 +390,7 @@ void DatabaseCatalog::assertDatabaseDoesntExistUnlocked(const String & database_ { assert(!database_name.empty()); if (databases.end() != databases.find(database_name)) - throw Exception("Database " + backQuoteIfNeed(database_name) + " already exists.", ErrorCodes::DATABASE_ALREADY_EXISTS); + throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", backQuoteIfNeed(database_name)); } void DatabaseCatalog::attachDatabase(const String & database_name, const DatabasePtr & database) @@ -409,7 +409,7 @@ void DatabaseCatalog::attachDatabase(const String & database_name, const Databas DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const String & database_name, bool drop, bool check_empty) { if (database_name == TEMPORARY_DATABASE) - throw Exception("Cannot detach database with temporary tables.", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Cannot detach database with temporary tables."); DatabasePtr db; { @@ -427,8 +427,7 @@ DatabasePtr DatabaseCatalog::detachDatabase(ContextPtr local_context, const Stri try { if (!db->empty()) - throw Exception("New table appeared in database being dropped or detached. Try again.", - ErrorCodes::DATABASE_NOT_EMPTY); + throw Exception(ErrorCodes::DATABASE_NOT_EMPTY, "New table appeared in database being dropped or detached. Try again."); if (!drop) db->assertCanBeDetached(false); } @@ -500,7 +499,7 @@ DatabasePtr DatabaseCatalog::getDatabase(const UUID & uuid) const { auto db_and_table = tryGetByUUID(uuid); if (!db_and_table.first || db_and_table.second) - throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database UUID {} does not exist", toString(uuid)); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database UUID {} does not exist", uuid); return db_and_table.first; } @@ -590,11 +589,11 @@ void DatabaseCatalog::addUUIDMapping(const UUID & uuid, const DatabasePtr & data /// We are trying to replace existing mapping (prev_database != nullptr), it's logical error if (database || table) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} already exists", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} already exists", uuid); /// Normally this should never happen, but it's possible when the same UUIDs are explicitly specified in different CREATE queries, /// so it's not LOGICAL_ERROR throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Mapping for table with UUID={} already exists. It happened due to UUID collision, " - "most likely because some not random UUIDs were manually specified in CREATE queries.", toString(uuid)); + "most likely because some not random UUIDs were manually specified in CREATE queries.", uuid); } void DatabaseCatalog::removeUUIDMapping(const UUID & uuid) @@ -604,7 +603,7 @@ void DatabaseCatalog::removeUUIDMapping(const UUID & uuid) std::lock_guard lock{map_part.mutex}; auto it = map_part.map.find(uuid); if (it == map_part.map.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", uuid); it->second = {}; } @@ -614,7 +613,7 @@ void DatabaseCatalog::removeUUIDMappingFinally(const UUID & uuid) UUIDToStorageMapPart & map_part = uuid_map[getFirstLevelIdx(uuid)]; std::lock_guard lock{map_part.mutex}; if (!map_part.map.erase(uuid)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", uuid); } void DatabaseCatalog::updateUUIDMapping(const UUID & uuid, DatabasePtr database, StoragePtr table) @@ -625,7 +624,7 @@ void DatabaseCatalog::updateUUIDMapping(const UUID & uuid, DatabasePtr database, std::lock_guard lock{map_part.mutex}; auto it = map_part.map.find(uuid); if (it == map_part.map.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", toString(uuid)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapping for table with UUID={} doesn't exist", uuid); auto & prev_database = it->second.first; auto & prev_table = it->second.second; assert(prev_database && prev_table); @@ -655,8 +654,7 @@ DatabaseCatalog & DatabaseCatalog::init(ContextMutablePtr global_context_) { if (database_catalog) { - throw Exception("Database catalog is initialized twice. This is a bug.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Database catalog is initialized twice. This is a bug."); } database_catalog.reset(new DatabaseCatalog(global_context_)); @@ -668,8 +666,7 @@ DatabaseCatalog & DatabaseCatalog::instance() { if (!database_catalog) { - throw Exception("Database catalog is not initialized. This is a bug.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Database catalog is not initialized. This is a bug."); } return *database_catalog; @@ -729,7 +726,7 @@ DDLGuardPtr DatabaseCatalog::getDDLGuard(const String & database, const String & return std::make_unique(db_guard.first, db_guard.second, std::move(lock), table, database); } -std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) +std::unique_lock DatabaseCatalog::getExclusiveDDLGuardForDatabase(const String & database) { DDLGuards::iterator db_guard_iter; { @@ -1282,7 +1279,7 @@ TemporaryLockForUUIDDirectory & TemporaryLockForUUIDDirectory::operator = (Tempo } -DDLGuard::DDLGuard(Map & map_, std::shared_mutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name) +DDLGuard::DDLGuard(Map & map_, SharedMutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name) : map(map_), db_mutex(db_mutex_), guards_lock(std::move(guards_lock_)) { it = map.emplace(elem, Entry{std::make_unique(), 0}).first; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index a3fa4515a69..5dc3f90b7f4 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -17,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -58,7 +58,7 @@ public: DDLGuard( Map & map_, - std::shared_mutex & db_mutex_, + SharedMutex & db_mutex_, std::unique_lock guards_lock_, const String & elem, const String & database_name); @@ -69,7 +69,7 @@ public: private: Map & map; - std::shared_mutex & db_mutex; + SharedMutex & db_mutex; Map::iterator it; std::unique_lock guards_lock; std::unique_lock table_lock; @@ -142,7 +142,7 @@ public: /// Get an object that protects the table from concurrently executing multiple DDL operations. DDLGuardPtr getDDLGuard(const String & database, const String & table); /// Get an object that protects the database from concurrent DDL queries all tables in the database - std::unique_lock getExclusiveDDLGuardForDatabase(const String & database); + std::unique_lock getExclusiveDDLGuardForDatabase(const String & database); void assertDatabaseExists(const String & database_name) const; @@ -298,7 +298,7 @@ private: /// For the duration of the operation, an element is placed here, and an object is returned, /// which deletes the element in the destructor when counter becomes zero. /// In case the element already exists, waits when query will be executed in other thread. See class DDLGuard below. - using DatabaseGuard = std::pair; + using DatabaseGuard = std::pair; using DDLGuards = std::map; DDLGuards ddl_guards TSA_GUARDED_BY(ddl_guards_mutex); /// If you capture mutex and ddl_guards_mutex, then you need to grab them strictly in this order. diff --git a/src/Interpreters/DuplicateOrderByVisitor.cpp b/src/Interpreters/DuplicateOrderByVisitor.cpp index b3573af9f8c..569253ff78d 100644 --- a/src/Interpreters/DuplicateOrderByVisitor.cpp +++ b/src/Interpreters/DuplicateOrderByVisitor.cpp @@ -78,7 +78,7 @@ void DuplicateOrderByFromSubqueriesData::visit(ASTSelectQuery & select_query, AS { auto * ast = child->as(); if (!ast || ast->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (ast->with_fill) return; diff --git a/src/Interpreters/EmbeddedDictionaries.cpp b/src/Interpreters/EmbeddedDictionaries.cpp index 0b2efaf3dbe..6c0ccce66b5 100644 --- a/src/Interpreters/EmbeddedDictionaries.cpp +++ b/src/Interpreters/EmbeddedDictionaries.cpp @@ -143,7 +143,7 @@ EmbeddedDictionaries::~EmbeddedDictionaries() void EmbeddedDictionaries::reload() { if (!reloadImpl(true, true)) - throw Exception("Some embedded dictionaries were not successfully reloaded", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Some embedded dictionaries were not successfully reloaded"); } diff --git a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp index 581a8cd87ee..7a5fc67596f 100644 --- a/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp +++ b/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp @@ -216,14 +216,14 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr } if (block.rows() != 1) - throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); Block tmp_block; while (tmp_block.rows() == 0 && executor.pull(tmp_block)) ; if (tmp_block.rows() != 0) - throw Exception("Scalar subquery returned more than one row", ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY); + throw Exception(ErrorCodes::INCORRECT_RESULT_OF_SCALAR_SUBQUERY, "Scalar subquery returned more than one row"); } block = materializeBlock(block); diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index 5ea29615942..97555feb426 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -67,7 +67,7 @@ ExpressionActions::ExpressionActions(ActionsDAGPtr actions_dag_, const Expressio if (settings.max_temporary_columns && num_columns > settings.max_temporary_columns) throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Too many temporary columns: {}. Maximum: {}", - actions_dag->dumpNames(), std::to_string(settings.max_temporary_columns)); + actions_dag->dumpNames(), settings.max_temporary_columns); } ExpressionActionsPtr ExpressionActions::clone() const @@ -536,9 +536,9 @@ void ExpressionActions::checkLimits(const ColumnsWithTypeAndName & columns) cons if (column.column && !isColumnConst(*column.column)) list_of_non_const_columns << "\n" << column.name; - throw Exception("Too many temporary non-const columns:" + list_of_non_const_columns.str() - + ". Maximum: " + std::to_string(settings.max_temporary_non_const_columns), - ErrorCodes::TOO_MANY_TEMPORARY_NON_CONST_COLUMNS); + throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_NON_CONST_COLUMNS, + "Too many temporary non-const columns:{}. Maximum: {}", + list_of_non_const_columns.str(), settings.max_temporary_non_const_columns); } } } @@ -575,7 +575,7 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon { auto & res_column = columns[action.result_position]; if (res_column.type || res_column.column) - throw Exception("Result column is not empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Result column is not empty"); res_column.type = action.node->result_type; res_column.name = action.node->result_name; @@ -622,7 +622,7 @@ static void executeAction(const ExpressionActions::Action & action, ExecutionCon const auto * array = getArrayJoinColumnRawPtr(array_join_key.column); if (!array) - throw Exception("ARRAY JOIN of not array nor map: " + action.node->result_name, ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "ARRAY JOIN of not array nor map: {}", action.node->result_name); for (auto & column : columns) if (column.column) @@ -812,7 +812,7 @@ NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & c } if (!min_size) - throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No available columns"); return result; } @@ -930,7 +930,7 @@ bool ExpressionActions::checkColumnIsAlwaysFalse(const String & column_name) con void ExpressionActionsChain::addStep(NameSet non_constant_inputs) { if (steps.empty()) - throw Exception("Cannot add action to empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add action to empty ExpressionActionsChain"); ColumnsWithTypeAndName columns = steps.back()->getResultColumns(); for (auto & column : columns) diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index faefe0985f7..48f18b3b407 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -260,7 +260,7 @@ struct ExpressionActionsChain : WithContext { if (allow_empty) return {}; - throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty ExpressionActionsChain"); } return typeid_cast(steps.back().get())->actions_dag; @@ -269,7 +269,7 @@ struct ExpressionActionsChain : WithContext Step & getLastStep() { if (steps.empty()) - throw Exception("Empty ExpressionActionsChain", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty ExpressionActionsChain"); return *steps.back(); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index fbd076ee746..80cc0414643 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -129,7 +129,7 @@ bool sanitizeBlock(Block & block, bool throw_if_cannot_create_column) if (isNotCreatable(col.type->getTypeId())) { if (throw_if_cannot_create_column) - throw Exception("Cannot create column of type " + col.type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Cannot create column of type {}", col.type->getName()); return false; } @@ -314,7 +314,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) const auto & column_name = group_elements_ast[j]->getColumnName(); const auto * node = temp_actions->tryFindInOutputs(column_name); if (!node) - throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); /// Only removes constant keys if it's an initiator or distributed_group_by_no_merge is enabled. if (getContext()->getClientInfo().distributed_depth == 0 || settings.distributed_group_by_no_merge > 0) @@ -368,7 +368,7 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions) const auto & column_name = group_asts[i]->getColumnName(); const auto * node = temp_actions->tryFindInOutputs(column_name); if (!node) - throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in GROUP BY): {}", column_name); /// Only removes constant keys if it's an initiator or distributed_group_by_no_merge is enabled. if (getContext()->getClientInfo().distributed_depth == 0 || settings.distributed_group_by_no_merge > 0) @@ -930,14 +930,14 @@ const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const { const auto * select_query = query->as(); if (!select_query) - throw Exception("Not a select query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not a select query"); return select_query; } const ASTSelectQuery * SelectQueryExpressionAnalyzer::getAggregatingQuery() const { if (!has_aggregation) - throw Exception("No aggregation", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No aggregation"); return getSelectQuery(); } @@ -1251,8 +1251,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere( const auto & node = step.actions()->findInOutputs(prewhere_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) - throw Exception("Invalid type for filter in PREWHERE: " + filter_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + filter_type->getName()); ActionsDAGPtr prewhere_actions; { @@ -1334,8 +1334,8 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, const auto & node = step.actions()->findInOutputs(where_column_name); auto filter_type = node.result_type; if (!filter_type->canBeUsedInBooleanContext()) - throw Exception("Invalid type for filter in WHERE: " + filter_type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in WHERE: {}", + filter_type->getName()); return true; } @@ -1556,7 +1556,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai { auto * ast = child->as(); if (!ast || ast->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (getContext()->getSettingsRef().enable_positional_arguments) replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY); @@ -2230,7 +2230,7 @@ void ExpressionAnalysisResult::checkActions() const if (actions) for (const auto & node : actions->getNodes()) if (node.type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("PREWHERE cannot contain ARRAY JOIN action", ErrorCodes::ILLEGAL_PREWHERE); + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "PREWHERE cannot contain ARRAY JOIN action"); }; check_actions(prewhere_info->prewhere_actions); diff --git a/src/Interpreters/ExternalLoader.cpp b/src/Interpreters/ExternalLoader.cpp index ea2b9045120..9858b27d57a 100644 --- a/src/Interpreters/ExternalLoader.cpp +++ b/src/Interpreters/ExternalLoader.cpp @@ -1000,7 +1000,7 @@ private: /// Loading. auto [new_object, new_exception] = loadSingleObject(name, *info->config, previous_version_as_base_for_loading); if (!new_object && !new_exception) - throw Exception("No object created and no exception raised for " + type_name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No object created and no exception raised for {}", type_name); /// Saving the result of the loading. { @@ -1440,7 +1440,7 @@ void ExternalLoader::checkLoaded(const ExternalLoader::LoadResult & result, if (result.object && (!check_no_errors || !result.exception)) return; if (result.status == ExternalLoader::Status::LOADING) - throw Exception(type_name + " '" + result.name + "' is still loading", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} '{}' is still loading", type_name, result.name); if (result.exception) { // Exception is shared for multiple threads. @@ -1466,9 +1466,9 @@ void ExternalLoader::checkLoaded(const ExternalLoader::LoadResult & result, } } if (result.status == ExternalLoader::Status::NOT_EXIST) - throw Exception(type_name + " '" + result.name + "' not found", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} '{}' not found", type_name, result.name); if (result.status == ExternalLoader::Status::NOT_LOADED) - throw Exception(type_name + " '" + result.name + "' not tried to load", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "{} '{}' not tried to load", type_name, result.name); } void ExternalLoader::checkLoaded(const ExternalLoader::LoadResults & results, diff --git a/src/Interpreters/ExternalLoaderTempConfigRepository.cpp b/src/Interpreters/ExternalLoaderTempConfigRepository.cpp index c4210875867..10fc61a2ed0 100644 --- a/src/Interpreters/ExternalLoaderTempConfigRepository.cpp +++ b/src/Interpreters/ExternalLoaderTempConfigRepository.cpp @@ -31,7 +31,7 @@ bool ExternalLoaderTempConfigRepository::exists(const String & path_) Poco::Timestamp ExternalLoaderTempConfigRepository::getUpdateTime(const String & path_) { if (!exists(path_)) - throw Exception("Loadable " + path_ + " not found", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Loadable {} not found", path_); return creation_time; } @@ -39,7 +39,7 @@ Poco::Timestamp ExternalLoaderTempConfigRepository::getUpdateTime(const String & LoadablesConfigurationPtr ExternalLoaderTempConfigRepository::load(const String & path_) { if (!exists(path_)) - throw Exception("Loadable " + path_ + " not found", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Loadable {} not found", path_); return config; } diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h index 036d50ba4d6..3f5804c39a0 100644 --- a/src/Interpreters/GetAggregatesVisitor.h +++ b/src/Interpreters/GetAggregatesVisitor.h @@ -66,8 +66,8 @@ private: if (isAggregateFunction(node)) { if (data.assert_no_aggregates) - throw Exception("Aggregate function " + node.getColumnName() + " is found " + String(data.assert_no_aggregates) + " in query", - ErrorCodes::ILLEGAL_AGGREGATION); + throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, "Aggregate function {} is found {} in query", + node.getColumnName(), String(data.assert_no_aggregates)); String column_name = node.getColumnName(); if (data.uniq_names.count(column_name)) @@ -79,8 +79,8 @@ private: else if (node.is_window_function) { if (data.assert_no_windows) - throw Exception("Window function " + node.getColumnName() + " is found " + String(data.assert_no_windows) + " in query", - ErrorCodes::ILLEGAL_AGGREGATION); + throw Exception(ErrorCodes::ILLEGAL_AGGREGATION, "Window function {} is found {} in query", + node.getColumnName(), String(data.assert_no_windows)); String column_name = node.getColumnName(); if (data.uniq_names.count(column_name)) diff --git a/src/Interpreters/GinFilter.cpp b/src/Interpreters/GinFilter.cpp index 8965d3721d2..4662128e8ab 100644 --- a/src/Interpreters/GinFilter.cpp +++ b/src/Interpreters/GinFilter.cpp @@ -1,31 +1,34 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace DB { + namespace ErrorCodes { extern const int BAD_ARGUMENTS; } + GinFilterParameters::GinFilterParameters(size_t ngrams_, Float64 density_) - : ngrams(ngrams_), density(density_) + : ngrams(ngrams_) + , density(density_) { if (ngrams > 8) - throw Exception("The size of gin filter cannot be greater than 8", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The size of inverted index filter cannot be greater than 8"); if (density <= 0 || density > 1) - throw Exception("The density of gin filter must be between 0 and 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The density inverted index gin filter must be between 0 and 1"); } GinFilter::GinFilter(const GinFilterParameters & params_) @@ -33,23 +36,23 @@ GinFilter::GinFilter(const GinFilterParameters & params_) { } -void GinFilter::add(const char* data, size_t len, UInt32 rowID, GinIndexStorePtr& store, UInt64 limit) const +void GinFilter::add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store, UInt64 limit) const { if (len > FST::MAX_TERM_LENGTH) return; String term(data, len); - auto it = store->getPostings().find(term); + auto it = store->getPostingsListBuilder().find(term); - if (it != store->getPostings().end()) + if (it != store->getPostingsListBuilder().end()) { if (!it->second->contains(rowID)) it->second->add(rowID); } else { - UInt64 threshold = std::lround(limit * params.density); - GinIndexStore::GinIndexPostingsBuilderPtr builder = std::make_shared(threshold); + UInt64 size_limit = std::lround(limit * params.density); + auto builder = std::make_shared(size_limit); builder->add(rowID); store->setPostingsBuilder(term, builder); @@ -66,7 +69,7 @@ void GinFilter::addRowRangeToGinFilter(UInt32 segmentID, UInt32 rowIDStart, UInt if (!rowid_ranges.empty()) { /// Try to merge the rowID range with the last one in the container - GinSegmentWithRowIDRange & last_rowid_range = rowid_ranges.back(); + GinSegmentWithRowIdRange & last_rowid_range = rowid_ranges.back(); if (last_rowid_range.segment_id == segmentID && last_rowid_range.range_end+1 == rowIDStart) @@ -80,93 +83,17 @@ void GinFilter::addRowRangeToGinFilter(UInt32 segmentID, UInt32 rowIDStart, UInt void GinFilter::clear() { + query_string.clear(); terms.clear(); rowid_ranges.clear(); - query_string.clear(); } -bool GinFilter::hasEmptyPostingsList(const PostingsCache& postings_cache) -{ - if (postings_cache.empty()) - return true; - - for (const auto& term_postings : postings_cache) - { - const SegmentedPostingsListContainer& container = term_postings.second; - if (container.empty()) - return true; - } - return false; -} - -bool GinFilter::matchInRange(const PostingsCache& postings_cache, UInt32 segment_id, UInt32 range_start, UInt32 range_end) -{ - /// Check for each terms - GinIndexPostingsList intersection_result; - bool intersection_result_init = false; - - for (const auto& term_postings : postings_cache) - { - /// Check if it is in the same segment by searching for segment_id - const SegmentedPostingsListContainer& container = term_postings.second; - auto container_it = container.find(segment_id); - if (container_it == container.cend()) - { - return false; - } - auto min_in_container = container_it->second->minimum(); - auto max_in_container = container_it->second->maximum(); - - //check if the postings list has always match flag - if (container_it->second->cardinality() == 1 && UINT32_MAX == min_in_container) - { - continue; //always match - } - - if (range_start > max_in_container || min_in_container > range_end) - { - return false; - } - - /// Delay initialization as late as possible - if (!intersection_result_init) - { - intersection_result_init = true; - intersection_result.addRange(range_start, range_end+1); - } - intersection_result &= *container_it->second; - if (intersection_result.cardinality() == 0) - { - return false; - } - } - return true; -} - -bool GinFilter::match(const PostingsCache& postings_cache) const -{ - if (hasEmptyPostingsList(postings_cache)) - { - return false; - } - - /// Check for each row ID ranges - for (const auto &rowid_range: rowid_ranges) - { - if (matchInRange(postings_cache, rowid_range.segment_id, rowid_range.range_start, rowid_range.range_end)) - { - return true; - } - } - return false; -} - -bool GinFilter::contains(const GinFilter & filter, PostingsCacheForStore &cache_store) const +bool GinFilter::contains(const GinFilter & filter, PostingsCacheForStore & cache_store) const { if (filter.getTerms().empty()) return true; - PostingsCachePtr postings_cache = cache_store.getPostings(filter.getQueryString()); + GinPostingsCachePtr postings_cache = cache_store.getPostings(filter.getQueryString()); if (postings_cache == nullptr) { GinIndexStoreDeserializer reader(cache_store.store); @@ -177,9 +104,73 @@ bool GinFilter::contains(const GinFilter & filter, PostingsCacheForStore &cache_ return match(*postings_cache); } -String GinFilter::getName() +namespace { - return FilterName; + +/// Helper method for checking if postings list cache is empty +bool hasEmptyPostingsList(const GinPostingsCache & postings_cache) +{ + if (postings_cache.empty()) + return true; + + for (const auto & term_postings : postings_cache) + { + const GinSegmentedPostingsListContainer & container = term_postings.second; + if (container.empty()) + return true; + } + return false; +} + +/// Helper method to check if the postings list cache has intersection with given row ID range +bool matchInRange(const GinPostingsCache & postings_cache, UInt32 segment_id, UInt32 range_start, UInt32 range_end) +{ + /// Check for each term + GinIndexPostingsList intersection_result; + bool intersection_result_init = false; + + for (const auto & term_postings : postings_cache) + { + /// Check if it is in the same segment by searching for segment_id + const GinSegmentedPostingsListContainer & container = term_postings.second; + auto container_it = container.find(segment_id); + if (container_it == container.cend()) + return false; + auto min_in_container = container_it->second->minimum(); + auto max_in_container = container_it->second->maximum(); + + //check if the postings list has always match flag + if (container_it->second->cardinality() == 1 && UINT32_MAX == min_in_container) + continue; //always match + + if (range_start > max_in_container || min_in_container > range_end) + return false; + + /// Delay initialization as late as possible + if (!intersection_result_init) + { + intersection_result_init = true; + intersection_result.addRange(range_start, range_end+1); + } + intersection_result &= *container_it->second; + if (intersection_result.cardinality() == 0) + return false; + } + return true; +} + +} + +bool GinFilter::match(const GinPostingsCache & postings_cache) const +{ + if (hasEmptyPostingsList(postings_cache)) + return false; + + /// Check for each row ID ranges + for (const auto & rowid_range: rowid_ranges) + if (matchInRange(postings_cache, rowid_range.segment_id, rowid_range.range_start, rowid_range.range_end)) + return true; + return false; } } diff --git a/src/Interpreters/GinFilter.h b/src/Interpreters/GinFilter.h index 0bcd4156f94..8985d84f215 100644 --- a/src/Interpreters/GinFilter.h +++ b/src/Interpreters/GinFilter.h @@ -1,19 +1,23 @@ #pragma once +#include #include #include -#include + namespace DB { + +static inline constexpr auto INVERTED_INDEX_NAME = "inverted"; + struct GinFilterParameters { - explicit GinFilterParameters(size_t ngrams_, Float64 density_); + GinFilterParameters(size_t ngrams_, Float64 density_); size_t ngrams; Float64 density; }; -struct GinSegmentWithRowIDRange +struct GinSegmentWithRowIdRange { /// Segment ID of the row ID range UInt32 segment_id; @@ -25,19 +29,20 @@ struct GinSegmentWithRowIDRange UInt32 range_end; }; +using GinSegmentWithRowIdRangeVector = std::vector; + /// GinFilter provides underlying functionalities for building inverted index and also /// it does filtering the unmatched rows according to its query string. /// It also builds and uses skipping index which stores (segmentID, RowIDStart, RowIDEnd) triples. class GinFilter { public: - using GinSegmentWithRowIDRanges = std::vector; - explicit GinFilter(const GinFilterParameters& params_); + explicit GinFilter(const GinFilterParameters & params_); - /// Add term(which length is 'len' and located at 'data') and its row ID to - /// the postings list builder for building inverted index for the given store. - void add(const char* data, size_t len, UInt32 rowID, GinIndexStorePtr& store, UInt64 limit) const; + /// Add term (located at 'data' with length 'len') and its row ID to the postings list builder + /// for building inverted index for the given store. + void add(const char * data, size_t len, UInt32 rowID, GinIndexStorePtr & store, UInt64 limit) const; /// Accumulate (segmentID, RowIDStart, RowIDEnd) for building skipping index void addRowRangeToGinFilter(UInt32 segmentID, UInt32 rowIDStart, UInt32 rowIDEnd); @@ -45,47 +50,33 @@ public: /// Clear the content void clear(); - /// Check if the filter(built from query string) contains any rows in given filter 'af' by using + /// Check if the filter (built from query string) contains any rows in given filter by using /// given postings list cache - bool contains(const GinFilter & filter, PostingsCacheForStore &cache_store) const; - - /// Const getter for the row ID ranges - const GinSegmentWithRowIDRanges& getFilter() const { return rowid_ranges; } - - /// Mutable getter for the row ID ranges - GinSegmentWithRowIDRanges& getFilter() { return rowid_ranges; } + bool contains(const GinFilter & filter, PostingsCacheForStore & cache_store) const; /// Set the query string of the filter - void setQueryString(const char* data, size_t len) + void setQueryString(const char * data, size_t len) { query_string = String(data, len); } - /// Const getter of the query string - const String &getQueryString() const { return query_string; } - /// Add term which are tokens generated from the query string - void addTerm(const char* data, size_t len) + void addTerm(const char * data, size_t len) { if (len > FST::MAX_TERM_LENGTH) return; terms.push_back(String(data, len)); } - /// Const getter of terms(generated from the query string) - const std::vector& getTerms() const { return terms;} + /// Getter + const String & getQueryString() const { return query_string; } + const std::vector & getTerms() const { return terms; } + const GinSegmentWithRowIdRangeVector & getFilter() const { return rowid_ranges; } + GinSegmentWithRowIdRangeVector & getFilter() { return rowid_ranges; } - /// Check if the given postings list cache has matched rows by using the filter - bool match(const PostingsCache& postings_cache) const; - - /// Get filter name ("inverted") - static String getName(); - - /// Constant of filter name - static constexpr auto FilterName = "inverted"; private: /// Filter parameters - const GinFilterParameters& params; + const GinFilterParameters & params; /// Query string of the filter String query_string; @@ -94,15 +85,12 @@ private: std::vector terms; /// Row ID ranges which are (segmentID, RowIDStart, RowIDEnd) - GinSegmentWithRowIDRanges rowid_ranges; + GinSegmentWithRowIdRangeVector rowid_ranges; - /// Helper method for checking if postings list cache is empty - static bool hasEmptyPostingsList(const PostingsCache& postings_cache); - - /// Helper method to check if the postings list cache has intersection with given row ID range - static bool matchInRange(const PostingsCache& postings_cache, UInt32 segment_id, UInt32 range_start, UInt32 range_end); + /// Check if the given postings list cache has matched rows by using the filter + bool match(const GinPostingsCache & postings_cache) const; }; -using GinFilterPtr = std::shared_ptr; +using GinFilters = std::vector; } diff --git a/src/Interpreters/GlobalSubqueriesVisitor.h b/src/Interpreters/GlobalSubqueriesVisitor.h index dc9294be878..b105cae31c6 100644 --- a/src/Interpreters/GlobalSubqueriesVisitor.h +++ b/src/Interpreters/GlobalSubqueriesVisitor.h @@ -92,7 +92,7 @@ public: } if (!subquery_or_table_name) - throw Exception("Global subquery requires subquery or table name", ErrorCodes::WRONG_GLOBAL_SUBQUERY); + throw Exception(ErrorCodes::WRONG_GLOBAL_SUBQUERY, "Global subquery requires subquery or table name"); if (is_table) { diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index b8c6c639e82..51d4c7d1f4b 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -41,7 +41,7 @@ namespace public: AccumulatedBlockReader(TemporaryFileStream & reader_, std::mutex & mutex_, - size_t result_block_size_ = DEFAULT_BLOCK_SIZE * 8) + size_t result_block_size_ = 0) : reader(reader_) , mutex(mutex_) , result_block_size(result_block_size_) @@ -59,18 +59,22 @@ namespace Blocks blocks; size_t rows_read = 0; - while (rows_read < result_block_size) + do { Block block = reader.read(); rows_read += block.rows(); if (!block) { eof = true; + if (blocks.size() == 1) + return blocks.front(); return concatenateBlocks(blocks); } blocks.push_back(std::move(block)); - } + } while (rows_read < result_block_size); + if (blocks.size() == 1) + return blocks.front(); return concatenateBlocks(blocks); } @@ -118,21 +122,12 @@ class GraceHashJoin::FileBucket : boost::noncopyable public: using BucketLock = std::unique_lock; - struct Stats - { - TemporaryFileStream::Stat left; - TemporaryFileStream::Stat right; - }; - - explicit FileBucket(size_t bucket_index_, - TemporaryFileStream & left_file_, - TemporaryFileStream & right_file_, - Poco::Logger * log_) + explicit FileBucket(size_t bucket_index_, TemporaryFileStream & left_file_, TemporaryFileStream & right_file_, Poco::Logger * log_) : idx{bucket_index_} , left_file{left_file_} , right_file{right_file_} , state{State::WRITING_BLOCKS} - , log(log_) + , log{log_} { } @@ -168,21 +163,18 @@ public: bool empty() const { return is_empty.load(); } - Stats getStat() const { return stats; } - AccumulatedBlockReader startJoining() { LOG_TRACE(log, "Joining file bucket {}", idx); - { std::unique_lock left_lock(left_file_mutex); std::unique_lock right_lock(right_file_mutex); - stats.left = left_file.finishWriting(); - stats.right = right_file.finishWriting(); + left_file.finishWriting(); + right_file.finishWriting(); + state = State::JOINING_BLOCKS; } - return AccumulatedBlockReader(right_file, right_file_mutex); } @@ -231,22 +223,23 @@ private: std::atomic_bool is_empty = true; std::atomic state; - Stats stats; Poco::Logger * log; }; namespace { + template -void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets) +void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets, size_t except_index = 0) { chassert(blocks.size() == buckets.size()); retryForEach( generateRandomPermutation(1, buckets.size()), // skipping 0 block, since we join it in memory w/o spilling on disk [&](size_t i) { - if (!blocks[i].rows()) + /// Skip empty and current bucket + if (!blocks[i].rows() || i == except_index) return true; bool flushed = false; @@ -281,6 +274,7 @@ GraceHashJoin::GraceHashJoin( , right_key_names(table_join->getOnlyClause().key_names_right) , tmp_data(std::make_unique(tmp_data_, CurrentMetrics::TemporaryFilesForJoin)) , hash_join(makeInMemoryJoin()) + , hash_join_sample_block(hash_join->savedBlockSample()) { if (!GraceHashJoin::isSupported(table_join)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "GraceHashJoin is not supported for this join type"); @@ -288,6 +282,9 @@ GraceHashJoin::GraceHashJoin( void GraceHashJoin::initBuckets() { + if (!buckets.empty()) + return; + const auto & settings = context->getSettingsRef(); size_t initial_num_buckets = roundUpToPowerOfTwoOrZero(std::clamp(settings.grace_hash_join_initial_buckets, 1, settings.grace_hash_join_max_buckets)); @@ -300,7 +297,7 @@ void GraceHashJoin::initBuckets() if (buckets.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "No buckets created"); - LOG_TRACE(log, "Initialize {} buckets", buckets.size()); + LOG_TRACE(log, "Initialize {} bucket{}", buckets.size(), buckets.size() > 1 ? "s" : ""); current_bucket = buckets.front().get(); current_bucket->startJoining(); @@ -320,17 +317,44 @@ bool GraceHashJoin::addJoinedBlock(const Block & block, bool /*check_limits*/) throw Exception(ErrorCodes::LOGICAL_ERROR, "GraceHashJoin is not initialized"); Block materialized = materializeBlock(block); - addJoinedBlockImpl(materialized); + addJoinedBlockImpl(std::move(materialized)); return true; } -bool GraceHashJoin::fitsInMemory() const +bool GraceHashJoin::hasMemoryOverflow(size_t total_rows, size_t total_bytes) const { /// One row can't be split, avoid loop - if (hash_join->getTotalRowCount() < 2) - return true; + if (total_rows < 2) + return false; - return table_join->sizeLimits().softCheck(hash_join->getTotalRowCount(), hash_join->getTotalByteCount()); + bool has_overflow = !table_join->sizeLimits().softCheck(total_rows, total_bytes); + + if (has_overflow) + LOG_TRACE(log, "Memory overflow, size exceeded {} / {} bytes, {} / {} rows", + ReadableSize(total_bytes), ReadableSize(table_join->sizeLimits().max_bytes), + total_rows, table_join->sizeLimits().max_rows); + + return has_overflow; +} + +bool GraceHashJoin::hasMemoryOverflow(const BlocksList & blocks) const +{ + size_t total_rows = 0; + size_t total_bytes = 0; + for (const auto & block : blocks) + { + total_rows += block.rows(); + total_bytes += block.allocatedBytes(); + } + return hasMemoryOverflow(total_rows, total_bytes); +} + +bool GraceHashJoin::hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const +{ + size_t total_rows = hash_join_->getTotalRowCount(); + size_t total_bytes = hash_join_->getTotalByteCount(); + + return hasMemoryOverflow(total_rows, total_bytes); } GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) @@ -341,12 +365,13 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) if (to_size <= current_size) return buckets; - assert(isPowerOf2(to_size)); + chassert(isPowerOf2(to_size)); if (to_size > max_num_buckets) { throw Exception(ErrorCodes::LIMIT_EXCEEDED, - "Too many grace hash join buckets ({} > {}), consider increasing grace_hash_join_max_buckets or max_rows_in_join/max_bytes_in_join", + "Too many grace hash join buckets ({} > {}), " + "consider increasing grace_hash_join_max_buckets or max_rows_in_join/max_bytes_in_join", to_size, max_num_buckets); } @@ -361,14 +386,16 @@ GraceHashJoin::Buckets GraceHashJoin::rehashBuckets(size_t to_size) void GraceHashJoin::addBucket(Buckets & destination) { - BucketPtr new_bucket = std::make_shared( - destination.size(), tmp_data->createStream(left_sample_block), tmp_data->createStream(right_sample_block), log); + auto & left_file = tmp_data->createStream(left_sample_block); + auto & right_file = tmp_data->createStream(prepareRightBlock(right_sample_block)); + + BucketPtr new_bucket = std::make_shared(destination.size(), left_file, right_file, log); destination.emplace_back(std::move(new_bucket)); } void GraceHashJoin::checkTypesOfKeys(const Block & block) const { - assert(hash_join); + chassert(hash_join); return hash_join->checkTypesOfKeys(block); } @@ -421,7 +448,7 @@ size_t GraceHashJoin::getTotalRowCount() const size_t GraceHashJoin::getTotalByteCount() const { std::lock_guard lock(hash_join_mutex); - assert(hash_join); + chassert(hash_join); return hash_join->getTotalByteCount(); } @@ -435,9 +462,14 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const std::shared_lock lock(rehash_mutex); return std::all_of(buckets.begin(), buckets.end(), [](const auto & bucket) { return bucket->empty(); }); }(); - bool hash_join_is_empty = hash_join && hash_join->alwaysReturnsEmptySet(); - return hash_join_is_empty && file_buckets_are_empty; + if (!file_buckets_are_empty) + return false; + + chassert(hash_join); + bool hash_join_is_empty = hash_join->alwaysReturnsEmptySet(); + + return hash_join_is_empty; } IBlocksStreamPtr GraceHashJoin::getNonJoinedBlocks(const Block &, const Block &, UInt64) const @@ -526,17 +558,11 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() if (hash_join) { - auto right_blocks = hash_join->releaseJoinedBlocks(); - Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, right_blocks, buckets.size()); - - for (size_t i = 0; i < blocks.size(); ++i) + auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); + for (auto & block : right_blocks) { - if (blocks[i].rows() == 0 || i == bucket_idx) - continue; - - if (i < bucket_idx) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected bucket index {} when current bucket is {}", i, bucket_idx); - buckets[i]->addRightBlock(blocks[i]); + Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, block, buckets.size()); + flushBlocksToBuckets(blocks, buckets, bucket_idx); } } @@ -568,7 +594,7 @@ IBlocksStreamPtr GraceHashJoin::getDelayedBlocks() return std::make_unique(current_bucket->idx, buckets, hash_join, left_key_names, right_key_names); } - LOG_TRACE(log, "Finished loading all buckets"); + LOG_TRACE(log, "Finished loading all {} buckets", buckets.size()); current_bucket = nullptr; return nullptr; @@ -579,42 +605,64 @@ GraceHashJoin::InMemoryJoinPtr GraceHashJoin::makeInMemoryJoin() return std::make_unique(table_join, right_sample_block, any_take_last_row); } +Block GraceHashJoin::prepareRightBlock(const Block & block) +{ + return HashJoin::prepareRightBlock(block, hash_join_sample_block); +} + void GraceHashJoin::addJoinedBlockImpl(Block block) { Buckets buckets_snapshot = getCurrentBuckets(); - Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, block, buckets_snapshot.size()); size_t bucket_index = current_bucket->idx; + Block current_block; + + { + Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, block, buckets_snapshot.size()); + flushBlocksToBuckets(blocks, buckets_snapshot, bucket_index); + current_block = std::move(blocks[bucket_index]); + } // Add block to the in-memory join - if (blocks[bucket_index].rows() > 0) + if (current_block.rows() > 0) { std::lock_guard lock(hash_join_mutex); - hash_join->addJoinedBlock(blocks[bucket_index], /* check_limits = */ false); - bool overflow = !fitsInMemory(); - - if (overflow) - { - auto right_blocks = hash_join->releaseJoinedBlocks(); - right_blocks.pop_back(); - - for (const auto & right_block : right_blocks) - blocks.push_back(right_block); - } - - while (overflow) - { - buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); - - blocks = JoinCommon::scatterBlockByHash(right_key_names, blocks, buckets_snapshot.size()); + if (!hash_join) hash_join = makeInMemoryJoin(); - hash_join->addJoinedBlock(blocks[bucket_index], /* check_limits = */ false); - overflow = !fitsInMemory(); - } - blocks[bucket_index].clear(); - } - flushBlocksToBuckets(blocks, buckets_snapshot); + hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + + if (!hasMemoryOverflow(hash_join)) + return; + + current_block = {}; + + auto right_blocks = hash_join->releaseJoinedBlocks(/* restructure */ false); + hash_join = nullptr; + + buckets_snapshot = rehashBuckets(buckets_snapshot.size() * 2); + + { + Blocks current_blocks; + current_blocks.reserve(right_blocks.size()); + for (const auto & right_block : right_blocks) + { + Blocks blocks = JoinCommon::scatterBlockByHash(right_key_names, right_block, buckets_snapshot.size()); + flushBlocksToBuckets(blocks, buckets_snapshot, bucket_index); + current_blocks.emplace_back(std::move(blocks[bucket_index])); + } + + if (current_blocks.size() == 1) + current_block = std::move(current_blocks.front()); + else + current_block = concatenateBlocks(current_blocks); + } + + hash_join = makeInMemoryJoin(); + + if (current_block.rows() > 0) + hash_join->addJoinedBlock(current_block, /* check_limits = */ false); + } } size_t GraceHashJoin::getNumBuckets() const diff --git a/src/Interpreters/GraceHashJoin.h b/src/Interpreters/GraceHashJoin.h index f4e75f142f3..4f7694e2f07 100644 --- a/src/Interpreters/GraceHashJoin.h +++ b/src/Interpreters/GraceHashJoin.h @@ -7,6 +7,7 @@ #include #include +#include #include @@ -95,8 +96,10 @@ private: /// Add right table block to the @join. Calls @rehash on overflow. void addJoinedBlockImpl(Block block); - /// Check that @join satisifes limits on rows/bytes in @table_join. - bool fitsInMemory() const; + /// Check that join satisfies limits on rows/bytes in table_join. + bool hasMemoryOverflow(size_t total_rows, size_t total_bytes) const; + bool hasMemoryOverflow(const InMemoryJoinPtr & hash_join_) const; + bool hasMemoryOverflow(const BlocksList & blocks) const; /// Create new bucket at the end of @destination. void addBucket(Buckets & destination); @@ -114,6 +117,9 @@ private: size_t getNumBuckets() const; Buckets getCurrentBuckets() const; + /// Structure block to store in the HashJoin according to sample_block. + Block prepareRightBlock(const Block & block); + Poco::Logger * log; ContextPtr context; std::shared_ptr table_join; @@ -130,12 +136,13 @@ private: TemporaryDataOnDiskPtr tmp_data; Buckets buckets; - mutable std::shared_mutex rehash_mutex; + mutable SharedMutex rehash_mutex; FileBucket * current_bucket = nullptr; mutable std::mutex current_bucket_mutex; InMemoryJoinPtr hash_join; + Block hash_join_sample_block; mutable std::mutex hash_join_mutex; }; diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index 5ff4f9beb05..22d02c4dbdf 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -221,8 +221,8 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , right_sample_block(right_sample_block_) , log(&Poco::Logger::get("HashJoin")) { - LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}, right header: {}", data->type, kind, strictness, right_sample_block.dumpStructure()); - LOG_DEBUG(log, "Keys: {}", TableJoin::formatClauses(table_join->getClauses(), true)); + LOG_DEBUG(log, "({}) Datatype: {}, kind: {}, strictness: {}, right header: {}", fmt::ptr(this), data->type, kind, strictness, right_sample_block.dumpStructure()); + LOG_DEBUG(log, "({}) Keys: {}", fmt::ptr(this), TableJoin::formatClauses(table_join->getClauses(), true)); if (isCrossOrComma(kind)) { @@ -267,10 +267,10 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s /// @note ASOF JOIN is not INNER. It's better avoid use of 'INNER ASOF' combination in messages. /// In fact INNER means 'LEFT SEMI ASOF' while LEFT means 'LEFT OUTER ASOF'. if (!isLeft(kind) && !isInner(kind)) - throw Exception("Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Wrong ASOF JOIN type. Only ASOF and LEFT ASOF joins are supported"); if (key_columns.size() <= 1) - throw Exception("ASOF join needs at least one equi-join column", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "ASOF join needs at least one equi-join column"); size_t asof_size; asof_type = SortedLookupVectorBase::getTypeSize(*key_columns.back(), asof_size); @@ -340,7 +340,7 @@ HashJoin::Type HashJoin::chooseMethod(JoinKind kind, const ColumnRawPtrs & key_c return Type::keys128; if (size_of_field == 32) return Type::keys256; - throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys @@ -469,6 +469,9 @@ bool HashJoin::alwaysReturnsEmptySet() const size_t HashJoin::getTotalRowCount() const { + if (!data) + return 0; + size_t res = 0; if (data->type == Type::CROSS) @@ -484,28 +487,45 @@ size_t HashJoin::getTotalRowCount() const } } - return res; } size_t HashJoin::getTotalByteCount() const { + if (!data) + return 0; + +#ifdef NDEBUG + size_t debug_blocks_allocated_size = 0; + for (const auto & block : data->blocks) + debug_blocks_allocated_size += block.allocatedBytes(); + + if (data->blocks_allocated_size != debug_blocks_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_allocated_size != debug_blocks_allocated_size ({} != {})", + data->blocks_allocated_size, debug_blocks_allocated_size); + + size_t debug_blocks_nullmaps_allocated_size = 0; + for (const auto & nullmap : data->blocks_nullmaps) + debug_blocks_nullmaps_allocated_size += nullmap.second->allocatedBytes(); + + if (data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size) + throw Exception(ErrorCodes::LOGICAL_ERROR, "data->blocks_nullmaps_allocated_size != debug_blocks_nullmaps_allocated_size ({} != {})", + data->blocks_nullmaps_allocated_size, debug_blocks_nullmaps_allocated_size); +#endif + size_t res = 0; - if (data->type == Type::CROSS) - { - for (const auto & block : data->blocks) - res += block.bytes(); - } - else + res += data->blocks_allocated_size; + res += data->blocks_nullmaps_allocated_size; + res += data->pool.size(); + + if (data->type != Type::CROSS) { for (const auto & map : data->maps) { joinDispatch(kind, strictness, map, [&](auto, auto, auto & map_) { res += map_.getTotalByteCountImpl(data->type); }); } - res += data->pool.size(); } - return res; } @@ -656,42 +676,57 @@ void HashJoin::initRightBlockStructure(Block & saved_block_sample) } } -Block HashJoin::structureRightBlock(const Block & block) const +Block HashJoin::prepareRightBlock(const Block & block, const Block & saved_block_sample_) { Block structured_block; - for (const auto & sample_column : savedBlockSample().getColumnsWithTypeAndName()) + for (const auto & sample_column : saved_block_sample_.getColumnsWithTypeAndName()) { ColumnWithTypeAndName column = block.getByName(sample_column.name); if (sample_column.column->isNullable()) JoinCommon::convertColumnToNullable(column); - structured_block.insert(column); + + if (column.column->lowCardinality() && !sample_column.column->lowCardinality()) + { + column.column = column.column->convertToFullColumnIfLowCardinality(); + column.type = removeLowCardinality(column.type); + } + + /// There's no optimization for right side const columns. Remove constness if any. + column.column = recursiveRemoveSparse(column.column->convertToFullColumnIfConst()); + structured_block.insert(std::move(column)); } return structured_block; } +Block HashJoin::prepareRightBlock(const Block & block) const +{ + return prepareRightBlock(block, savedBlockSample()); +} + bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) { + if (!data) + throw Exception("Join data was released", ErrorCodes::LOGICAL_ERROR); + /// RowRef::SizeT is uint32_t (not size_t) for hash table Cell memory efficiency. /// It's possible to split bigger blocks and insert them by parts here. But it would be a dead code. if (unlikely(source_block.rows() > std::numeric_limits::max())) - throw Exception("Too many rows in right table block for HashJoin: " + toString(source_block.rows()), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Too many rows in right table block for HashJoin: {}", source_block.rows()); - /// There's no optimization for right side const columns. Remove constness if any. - Block block = materializeBlock(source_block); - size_t rows = block.rows(); + size_t rows = source_block.rows(); - ColumnRawPtrMap all_key_columns = JoinCommon::materializeColumnsInplaceMap(block, table_join->getAllNames(JoinTableSide::Right)); + ColumnPtrMap all_key_columns = JoinCommon::materializeColumnsInplaceMap(source_block, table_join->getAllNames(JoinTableSide::Right)); - Block structured_block = structureRightBlock(block); + Block block_to_save = prepareRightBlock(source_block); size_t total_rows = 0; size_t total_bytes = 0; { if (storage_join_lock) - throw DB::Exception("addJoinedBlock called when HashJoin locked to prevent updates", - ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "addJoinedBlock called when HashJoin locked to prevent updates"); - data->blocks.emplace_back(std::move(structured_block)); + data->blocks_allocated_size += block_to_save.allocatedBytes(); + data->blocks.emplace_back(std::move(block_to_save)); Block * stored_block = &data->blocks.back(); if (rows) @@ -703,7 +738,7 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) { ColumnRawPtrs key_columns; for (const auto & name : onexprs[onexpr_idx].key_names_right) - key_columns.push_back(all_key_columns[name]); + key_columns.push_back(all_key_columns[name].get()); /// We will insert to the map only keys, where all components are not NULL. ConstNullMapPtr null_map{}; @@ -718,14 +753,14 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) save_nullmap |= (*null_map)[i]; } - auto join_mask_col = JoinCommon::getColumnAsMask(block, onexprs[onexpr_idx].condColumnNames().second); + auto join_mask_col = JoinCommon::getColumnAsMask(source_block, onexprs[onexpr_idx].condColumnNames().second); /// Save blocks that do not hold conditions in ON section ColumnUInt8::MutablePtr not_joined_map = nullptr; - if (!multiple_disjuncts && isRightOrFull(kind) && !join_mask_col.isConstant()) + if (!multiple_disjuncts && isRightOrFull(kind) && join_mask_col.hasData()) { const auto & join_mask = join_mask_col.getData(); /// Save rows that do not hold conditions - not_joined_map = ColumnUInt8::create(block.rows(), 0); + not_joined_map = ColumnUInt8::create(rows, 0); for (size_t i = 0, sz = join_mask->size(); i < sz; ++i) { /// Condition hold, do not save row @@ -759,10 +794,16 @@ bool HashJoin::addJoinedBlock(const Block & source_block, bool check_limits) } if (!multiple_disjuncts && save_nullmap) + { + data->blocks_nullmaps_allocated_size += null_map_holder->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, null_map_holder); + } if (!multiple_disjuncts && not_joined_map) + { + data->blocks_nullmaps_allocated_size += not_joined_map->allocatedBytes(); data->blocks_nullmaps.emplace_back(stored_block, std::move(not_joined_map)); + } if (!check_limits) return true; @@ -795,7 +836,6 @@ struct JoinOnKeyColumns Sizes key_sizes; - explicit JoinOnKeyColumns(const Block & block, const Names & key_names_, const String & cond_column_name, const Sizes & key_sizes_) : key_names(key_names_) , materialized_keys_holder(JoinCommon::materializeColumns(block, key_names)) /// Rare case, when keys are constant or low cardinality. To avoid code bloat, simply materialize them. @@ -1610,10 +1650,9 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, { size_t num_keys = data_types.size(); if (right_table_keys.columns() != num_keys) - throw Exception( - "Number of arguments for function joinGet" + toString(or_null ? "OrNull" : "") - + " doesn't match: passed, should be equal to " + toString(num_keys), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Number of arguments for function joinGet{} doesn't match: passed, should be equal to {}", + toString(or_null ? "OrNull" : ""), toString(num_keys)); for (size_t i = 0; i < num_keys; ++i) { @@ -1622,14 +1661,12 @@ DataTypePtr HashJoin::joinGetCheckAndGetReturnType(const DataTypes & data_types, auto left_type = removeNullable(recursiveRemoveLowCardinality(left_type_origin)); auto right_type = removeNullable(recursiveRemoveLowCardinality(right_type_origin)); if (!left_type->equals(*right_type)) - throw Exception( - "Type mismatch in joinGet key " + toString(i) + ": found type " + left_type->getName() + ", while the needed type is " - + right_type->getName(), - ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch in joinGet key {}: " + "found type {}, while the needed type is {}", i, left_type->getName(), right_type->getName()); } if (!sample_block_with_columns_to_add.has(column_name)) - throw Exception("StorageJoin doesn't contain column " + column_name, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "StorageJoin doesn't contain column {}", column_name); auto elem = sample_block_with_columns_to_add.getByName(column_name); if (or_null && JoinCommon::canBecomeNullable(elem.type)) @@ -1644,7 +1681,7 @@ ColumnWithTypeAndName HashJoin::joinGet(const Block & block, const Block & block bool is_valid = (strictness == JoinStrictness::Any || strictness == JoinStrictness::RightAny) && kind == JoinKind::Left; if (!is_valid) - throw Exception("joinGet only supports StorageJoin of type Left Any", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN); + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "joinGet only supports StorageJoin of type Left Any"); const auto & key_names_right = table_join->getOnlyClause().key_names_right; /// Assemble the key block with correct names. @@ -1676,7 +1713,7 @@ void HashJoin::checkTypesOfKeys(const Block & block) const void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) { - if (data->released) + if (!data) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); for (const auto & onexpr : table_join->getClauses()) @@ -1715,6 +1752,16 @@ void HashJoin::joinBlock(Block & block, ExtraBlockPtr & not_processed) } } +HashJoin::~HashJoin() +{ + if (!data) + { + LOG_TRACE(log, "({}) Join data has been already released", fmt::ptr(this)); + return; + } + LOG_TRACE(log, "({}) Join data is being destroyed, {} bytes and {} rows in hash table", fmt::ptr(this), getTotalByteCount(), getTotalRowCount()); +} + template struct AdderNonJoined { @@ -1753,7 +1800,6 @@ struct AdderNonJoined } }; - /// Stream from not joined earlier rows of the right table. /// Based on: /// - map offsetInternal saved in used_flags for single disjuncts @@ -1764,7 +1810,10 @@ class NotJoinedHash final : public NotJoinedBlocks::RightColumnsFiller public: NotJoinedHash(const HashJoin & parent_, UInt64 max_block_size_) : parent(parent_), max_block_size(max_block_size_), current_block_start(0) - {} + { + if (parent.data == nullptr) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot join after data has been released"); + } Block getEmptyBlock() override { return parent.savedBlockSample().cloneEmpty(); } @@ -1961,7 +2010,6 @@ IBlocksStreamPtr HashJoin::getNonJoinedBlocks(const Block & left_sample_block, size_t left_columns_count = left_sample_block.columns(); auto non_joined = std::make_unique>(*this, max_block_size); return std::make_unique(std::move(non_joined), result_sample_block, left_columns_count, *table_join); - } else { @@ -1979,7 +2027,7 @@ void HashJoin::reuseJoinedData(const HashJoin & join) bool multiple_disjuncts = !table_join->oneDisjunct(); if (multiple_disjuncts) - throw Exception("StorageJoin with ORs is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); for (auto & map : data->maps) { @@ -1990,10 +2038,20 @@ void HashJoin::reuseJoinedData(const HashJoin & join) } } -BlocksList HashJoin::releaseJoinedBlocks() +BlocksList HashJoin::releaseJoinedBlocks(bool restructure) { + LOG_TRACE(log, "({}) Join data is being released, {} bytes and {} rows in hash table", fmt::ptr(this), getTotalByteCount(), getTotalRowCount()); + BlocksList right_blocks = std::move(data->blocks); - data->released = true; + if (!restructure) + { + data.reset(); + return right_blocks; + } + + data->maps.clear(); + data->blocks_nullmaps.clear(); + BlocksList restored_blocks; /// names to positions optimization @@ -2022,6 +2080,7 @@ BlocksList HashJoin::releaseJoinedBlocks() restored_blocks.emplace_back(std::move(restored_block)); } + data.reset(); return restored_blocks; } diff --git a/src/Interpreters/HashJoin.h b/src/Interpreters/HashJoin.h index 5ea47823b69..b29b6e617c8 100644 --- a/src/Interpreters/HashJoin.h +++ b/src/Interpreters/HashJoin.h @@ -149,6 +149,8 @@ class HashJoin : public IJoin public: HashJoin(std::shared_ptr table_join_, const Block & right_sample_block, bool any_take_last_row_ = false); + ~HashJoin() override; + const TableJoin & getTableJoin() const override { return *table_join; } /** Add block of data from right hand of JOIN to the map. @@ -336,7 +338,8 @@ public: /// Additional data - strings for string keys and continuation elements of single-linked lists of references to rows. Arena pool; - bool released = false; + size_t blocks_allocated_size = 0; + size_t blocks_nullmaps_allocated_size = 0; }; using RightTableDataPtr = std::shared_ptr; @@ -351,7 +354,13 @@ public: void reuseJoinedData(const HashJoin & join); RightTableDataPtr getJoinedData() const { return data; } - BlocksList releaseJoinedBlocks(); + BlocksList releaseJoinedBlocks(bool restructure = false); + + /// Modify right block (update structure according to sample block) to save it in block list + static Block prepareRightBlock(const Block & block, const Block & saved_block_sample_); + Block prepareRightBlock(const Block & block) const; + + const Block & savedBlockSample() const { return data->sample_block; } bool isUsed(size_t off) const { return used_flags.getUsedSafe(off); } bool isUsed(const Block * block_ptr, size_t row_idx) const { return used_flags.getUsedSafe(block_ptr, row_idx); } @@ -403,10 +412,6 @@ private: void dataMapInit(MapsVariant &); - const Block & savedBlockSample() const { return data->sample_block; } - - /// Modify (structure) right block to save it in block list - Block structureRightBlock(const Block & stored_block) const; void initRightBlockStructure(Block & saved_block_sample); template diff --git a/src/Interpreters/ITokenExtractor.h b/src/Interpreters/ITokenExtractor.h index 77de4233b63..fdcc9880bff 100644 --- a/src/Interpreters/ITokenExtractor.h +++ b/src/Interpreters/ITokenExtractor.h @@ -77,12 +77,15 @@ class ITokenExtractorHelper : public ITokenExtractor { size_t cur = 0; String token; + while (cur < length && static_cast(this)->nextInStringLike(data, length, &cur, token)) bloom_filter.add(token.c_str(), token.size()); } + void stringToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const override { gin_filter.setQueryString(data, length); + size_t cur = 0; size_t token_start = 0; size_t token_len = 0; @@ -94,6 +97,7 @@ class ITokenExtractorHelper : public ITokenExtractor void stringPaddedToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const override { gin_filter.setQueryString(data, length); + size_t cur = 0; size_t token_start = 0; size_t token_len = 0; @@ -105,8 +109,10 @@ class ITokenExtractorHelper : public ITokenExtractor void stringLikeToGinFilter(const char * data, size_t length, GinFilter & gin_filter) const override { gin_filter.setQueryString(data, length); + size_t cur = 0; String token; + while (cur < length && static_cast(this)->nextInStringLike(data, length, &cur, token)) gin_filter.addTerm(token.c_str(), token.size()); } diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 0aa70057794..36972aeb03d 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -56,7 +56,7 @@ std::optional tryChooseTable(const ASTIdentifier & identifier, const std if ((best_match != ColumnMatch::NoMatch) && same_match) { if (!allow_ambiguous) - throw Exception("Ambiguous column '" + identifier.name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Ambiguous column '{}'", identifier.name()); best_match = ColumnMatch::Ambiguous; return {}; } diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 58faeb41a15..ca804fe84a3 100644 --- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -81,8 +81,8 @@ private: String alias = database_and_table->tryGetAlias(); if (alias.empty()) - throw Exception("Distributed table should have an alias when distributed_product_mode set to local", - ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED); + throw Exception(ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED, + "Distributed table should have an alias when distributed_product_mode set to local"); auto & identifier = database_and_table->as(); renamed_tables.emplace_back(identifier.clone()); @@ -103,22 +103,22 @@ private: /// Already processed. } else - throw Exception("Logical error: unexpected function name " + concrete->name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected function name {}", concrete->name); } else if (table_join) table_join->locality = JoinLocality::Global; else - throw Exception("Logical error: unexpected AST node", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected AST node"); } else if (distributed_product_mode == DistributedProductMode::DENY) { - throw Exception("Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny')." - " You may rewrite query to use local tables in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value.", - ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED); + throw Exception(ErrorCodes::DISTRIBUTED_IN_JOIN_SUBQUERY_DENIED, + "Double-distributed IN/JOIN subqueries is denied (distributed_product_mode = 'deny'). " + "You may rewrite query to use local tables " + "in subqueries, or use GLOBAL keyword, or set distributed_product_mode to suitable value."); } else - throw Exception("InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "InJoinSubqueriesPreprocessor: unexpected value of 'distributed_product_mode' setting"); } }; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 300574912d9..ee5aad3d18e 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -121,8 +121,8 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) else if (auto mut_command = MutationCommand::parse(command_ast)) { if (mut_command->type == MutationCommand::MATERIALIZE_TTL && !metadata_snapshot->hasAnyTTL()) - throw Exception("Cannot MATERIALIZE TTL as there is no TTL set for table " - + table->getStorageID().getNameForLogs(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot MATERIALIZE TTL as there is no TTL set for table {}", + table->getStorageID().getNameForLogs()); mutation_commands.emplace_back(std::move(*mut_command)); } @@ -131,7 +131,7 @@ BlockIO InterpreterAlterQuery::executeToTable(const ASTAlterQuery & alter) live_view_commands.emplace_back(std::move(*live_view_command)); } else - throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); } if (typeid_cast(database.get())) @@ -200,7 +200,7 @@ BlockIO InterpreterAlterQuery::executeToDatabase(const ASTAlterQuery & alter) if (auto alter_command = AlterCommand::parse(command_ast)) alter_commands.emplace_back(std::move(*alter_command)); else - throw Exception("Wrong parameter type in ALTER DATABASE query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER DATABASE query"); } if (!alter_commands.empty()) @@ -451,7 +451,7 @@ void InterpreterAlterQuery::extendQueryLogElemImpl(QueryLogElement & elem, const { // Alter queries already have their target table inserted into `elem`. if (elem.query_tables.size() != 1) - throw Exception("Alter query should have target table recorded already", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Alter query should have target table recorded already"); String prefix = *elem.query_tables.begin() + "."; for (const auto & child : alter.command_list->children) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index bea88885d20..611f533d559 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -124,7 +124,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (create.if_not_exists) return {}; else - throw Exception("Database " + database_name + " already exists.", ErrorCodes::DATABASE_ALREADY_EXISTS); + throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", database_name); } /// Will write file with database metadata, if needed. @@ -136,7 +136,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (!create.storage && create.attach) { if (!fs::exists(metadata_file_path)) - throw Exception("Database engine must be specified for ATTACH DATABASE query", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Database engine must be specified for ATTACH DATABASE query"); /// Short syntax: try read database definition from file auto ast = DatabaseOnDisk::parseQueryFromMetadata(nullptr, getContext(), metadata_file_path); create = ast->as(); @@ -151,7 +151,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) /// For new-style databases engine is explicitly specified in .sql /// When attaching old-style database during server startup, we must always use Ordinary engine if (create.attach) - throw Exception("Database engine must be specified for ATTACH DATABASE query", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, "Database engine must be specified for ATTACH DATABASE query"); auto engine = std::make_shared(); auto storage = std::make_shared(); engine->name = "Atomic"; @@ -204,8 +204,9 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { /// Ambiguity is possible: should we attach nested database as Ordinary /// or throw "UUID must be specified" for Atomic? So we suggest short syntax for Ordinary. - throw Exception("Use short attach syntax ('ATTACH DATABASE name;' without engine) to attach existing database " - "or specify UUID to attach new database with Atomic engine", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Use short attach syntax ('ATTACH DATABASE name;' without engine) " + "to attach existing database or specify UUID to attach new database with Atomic engine"); } /// Set metadata path according to nested engine @@ -218,7 +219,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { bool is_on_cluster = getContext()->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY; if (create.uuid != UUIDHelpers::Nil && !is_on_cluster) - throw Exception("Ordinary database engine does not support UUID", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Ordinary database engine does not support UUID"); /// Ignore UUID if it's ON CLUSTER query create.uuid = UUIDHelpers::Nil; @@ -229,24 +230,27 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) && !getContext()->getSettingsRef().allow_experimental_database_materialized_mysql && !internal && !create.attach) { - throw Exception("MaterializedMySQL is an experimental database engine. " - "Enable allow_experimental_database_materialized_mysql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, + "MaterializedMySQL is an experimental database engine. " + "Enable allow_experimental_database_materialized_mysql to use it."); } if (create.storage->engine->name == "Replicated" && !getContext()->getSettingsRef().allow_experimental_database_replicated && !internal && !create.attach) { - throw Exception("Replicated is an experimental database engine. " - "Enable allow_experimental_database_replicated to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, + "Replicated is an experimental database engine. " + "Enable allow_experimental_database_replicated to use it."); } if (create.storage->engine->name == "MaterializedPostgreSQL" && !getContext()->getSettingsRef().allow_experimental_database_materialized_postgresql && !internal && !create.attach) { - throw Exception("MaterializedPostgreSQL is an experimental database engine. " - "Enable allow_experimental_database_materialized_postgresql to use it.", ErrorCodes::UNKNOWN_DATABASE_ENGINE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE_ENGINE, + "MaterializedPostgreSQL is an experimental database engine. " + "Enable allow_experimental_database_materialized_postgresql to use it."); } bool need_write_metadata = !create.attach || !fs::exists(metadata_file_path); @@ -478,7 +482,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.collation && !context_->getSettingsRef().compatibility_ignore_collation_in_create_table) { - throw Exception("Cannot support collation, please set compatibility_ignore_collation_in_create_table=true", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot support collation, please set compatibility_ignore_collation_in_create_table=true"); } DataTypePtr column_type = nullptr; @@ -492,7 +496,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.null_modifier) { if (column_type->isNullable()) - throw Exception("Can't use [NOT] NULL modifier with Nullable type", ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE); + throw Exception(ErrorCodes::ILLEGAL_SYNTAX_FOR_DATA_TYPE, "Can't use [NOT] NULL modifier with Nullable type"); if (*col_decl.null_modifier) column_type = makeNullable(column_type); } @@ -571,10 +575,9 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.default_specifier == "AUTO_INCREMENT" && !context_->getSettingsRef().compatibility_ignore_auto_increment_in_create_table) { - throw Exception( - "AUTO_INCREMENT is not supported. To ignore the keyword in column declaration, set " - "`compatibility_ignore_auto_increment_in_create_table` to true", - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, + "AUTO_INCREMENT is not supported. To ignore the keyword " + "in column declaration, set `compatibility_ignore_auto_increment_in_create_table` to true"); } if (col_decl.default_expression) @@ -607,7 +610,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( else if (col_decl.type) column.type = name_type_it->type; else - throw Exception{"Neither default value expression nor type is provided for a column", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Neither default value expression nor type is provided for a column"); if (col_decl.comment) column.comment = col_decl.comment->as().value.get(); @@ -615,7 +618,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( if (col_decl.codec) { if (col_decl.default_specifier == "ALIAS") - throw Exception{"Cannot specify codec for column type ALIAS", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST( col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs); } @@ -630,7 +633,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription( res.flattenNested(); if (res.getAllPhysical().empty()) - throw Exception{"Cannot CREATE table without physical columns", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED}; + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Cannot CREATE table without physical columns"); return res; } @@ -666,7 +669,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti if (create.columns_list) { if (create.as_table_function && (create.columns_list->indices || create.columns_list->constraints)) - throw Exception("Indexes and constraints are not supported for table functions", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Indexes and constraints are not supported for table functions"); /// Dictionaries have dictionary_attributes_list instead of columns_list assert(!create.is_dictionary); @@ -680,14 +683,14 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti for (const auto & index : create.columns_list->indices->children) { IndexDescription index_desc = IndexDescription::getIndexFromAST(index->clone(), properties.columns, getContext()); - if (index_desc.type == GinFilter::FilterName && getContext()->getSettingsRef().allow_experimental_inverted_index == false) + const auto & settings = getContext()->getSettingsRef(); + if (index_desc.type == INVERTED_INDEX_NAME && !settings.allow_experimental_inverted_index) { - throw Exception( - "Experimental Inverted Index feature is not enabled (the setting 'allow_experimental_inverted_index')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental Inverted Index feature is not enabled (the setting 'allow_experimental_inverted_index')"); } - if (index_desc.type == "annoy" && !getContext()->getSettingsRef().allow_experimental_annoy_index) - throw Exception("Annoy index is disabled. Turn on allow_experimental_annoy_index", ErrorCodes::INCORRECT_QUERY); + if (index_desc.type == "annoy" && !settings.allow_experimental_annoy_index) + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index is disabled. Turn on allow_experimental_annoy_index"); properties.indices.push_back(index_desc); } @@ -754,7 +757,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti /// We can have queries like "CREATE TABLE ENGINE=" if /// supports schema inference (will determine table structure in it's constructor). else if (!StorageFactory::instance().checkIfStorageSupportsSchemaInterface(create.storage->engine->name)) - throw Exception("Incorrect CREATE query: required list of column descriptions or AS section or SELECT.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect CREATE query: required list of column descriptions or AS section or SELECT."); /// Even if query has list of columns, canonicalize it (unfold Nested columns). if (!create.columns_list) @@ -787,7 +790,7 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat for (const auto & column : properties.columns) { if (!all_columns.emplace(column.name).second) - throw Exception("Column " + backQuoteIfNeed(column.name) + " already exists", ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} already exists", backQuoteIfNeed(column.name)); } /// Check if _row_exists for lightweight delete column in column_lists for merge tree family. @@ -795,9 +798,10 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat { auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name); if (search != all_columns.end()) - throw Exception("Cannot create table with column '" + LightweightDeleteDescription::FILTER_COLUMN.name + "' " - "for *MergeTree engines because it is reserved for lightweight delete feature", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot create table with column '{}' for *MergeTree engines because it " + "is reserved for lightweight delete feature", + LightweightDeleteDescription::FILTER_COLUMN.name); } const auto & settings = getContext()->getSettingsRef(); @@ -810,10 +814,11 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat if (const auto * current_type_ptr = typeid_cast(name_and_type_pair.type.get())) { if (!isStringOrFixedString(*removeNullable(current_type_ptr->getDictionaryType()))) - throw Exception("Creating columns of type " + current_type_ptr->getName() + " is prohibited by default " + throw Exception(ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY, + "Creating columns of type {} is prohibited by default " "due to expected negative impact on performance. " "It can be enabled with the \"allow_suspicious_low_cardinality_types\" setting.", - ErrorCodes::SUSPICIOUS_TYPE_FOR_LOW_CARDINALITY); + current_type_ptr->getName()); } } } @@ -825,10 +830,10 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat const auto & type = name_and_type_pair.type->getName(); if (type == "MultiPolygon" || type == "Polygon" || type == "Ring" || type == "Point") { - String message = "Cannot create table with column '" + name_and_type_pair.name + "' which type is '" - + type + "' because experimental geo types are not allowed. " - + "Set setting allow_experimental_geo_types = 1 in order to allow it"; - throw Exception(message, ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot create table with column '{}' which type is '{}' " + "because experimental geo types are not allowed. " + "Set setting allow_experimental_geo_types = 1 in order to allow it", + name_and_type_pair.name, type); } } } @@ -891,7 +896,7 @@ String InterpreterCreateQuery::getTableEngineName(DefaultTableEngine default_tab return "Memory"; default: - throw Exception("default_table_engine is set to unknown value", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "default_table_engine is set to unknown value"); } } @@ -1008,7 +1013,7 @@ void InterpreterCreateQuery::assertOrSetUUID(ASTCreateQuery & create, const Data && !internal) { if (create.uuid == UUIDHelpers::Nil) - throw Exception("Table UUID is not specified in DDL log", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table UUID is not specified in DDL log"); } bool from_path = create.attach_from_path.has_value(); @@ -1052,8 +1057,9 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) { /// Temporary tables are created out of databases. if (create.temporary && create.database) - throw Exception("Temporary tables cannot be inside a database. You should not specify a database for a temporary table.", - ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE); + throw Exception(ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE, + "Temporary tables cannot be inside a database. " + "You should not specify a database for a temporary table."); String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database ? create.getDatabase() : current_database; @@ -1382,7 +1388,8 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, bool is_replicated_storage = typeid_cast(res.get()) != nullptr; if (!is_replicated_storage && res->storesDataOnDisk() && database && database->getEngineName() == "Replicated") throw Exception(ErrorCodes::UNKNOWN_STORAGE, - "Only tables with a Replicated engine or tables which do not store data on disk are allowed in a Replicated database"); + "Only tables with a Replicated engine " + "or tables which do not store data on disk are allowed in a Replicated database"); } if (from_path && !res->storesDataOnDisk()) @@ -1603,10 +1610,11 @@ void InterpreterCreateQuery::prepareOnClusterQuery(ASTCreateQuery & create, Cont return; } - throw Exception("Seems like cluster is configured for cross-replication, " + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Seems like cluster is configured for cross-replication, " "but zookeeper_path for ReplicatedMergeTree is not specified or contains {uuid} macro. " "It's not supported for cross replication, because tables must have different UUIDs. " - "Please specify unique zookeeper_path explicitly.", ErrorCodes::INCORRECT_QUERY); + "Please specify unique zookeeper_path explicitly."); } } diff --git a/src/Interpreters/InterpreterDeleteQuery.cpp b/src/Interpreters/InterpreterDeleteQuery.cpp index 736fc90a346..abccc313e14 100644 --- a/src/Interpreters/InterpreterDeleteQuery.cpp +++ b/src/Interpreters/InterpreterDeleteQuery.cpp @@ -78,7 +78,9 @@ BlockIO InterpreterDeleteQuery::execute() else if (table->supportsLightweightDelete()) { if (!getContext()->getSettingsRef().allow_experimental_lightweight_delete) - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Lightweight delete mutate is experimental. Set `allow_experimental_lightweight_delete` setting to enable it"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Lightweight delete mutate is experimental. " + "Set `allow_experimental_lightweight_delete` setting to enable it"); /// Convert to MutationCommand MutationCommands mutation_commands; diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 8cc9b38e44f..f2f937f6ec0 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -70,7 +70,7 @@ BlockIO InterpreterDropQuery::execute() else if (drop.database) return executeToDatabase(drop); else - throw Exception("Nothing to drop, both names are empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Nothing to drop, both names are empty"); } @@ -201,7 +201,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue else if (query.kind == ASTDropQuery::Kind::Truncate) { if (table->isDictionary()) - throw Exception("Cannot TRUNCATE dictionary", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot TRUNCATE dictionary"); context_->checkAccess(AccessType::TRUNCATE, table_id); if (table->isStaticStorage()) @@ -262,7 +262,7 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, ASTDropQuery::Kind kind) { if (kind == ASTDropQuery::Kind::Detach) - throw Exception("Unable to detach temporary table.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unable to detach temporary table."); else { auto context_handle = getContext()->hasSessionContext() ? getContext()->getSessionContext() : getContext(); @@ -287,6 +287,10 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, table->drop(); table->is_dropped = true; } + else if (kind == ASTDropQuery::Kind::Detach) + { + table->is_detached = true; + } } } @@ -331,7 +335,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, { if (query.kind == ASTDropQuery::Kind::Truncate) { - throw Exception("Unable to truncate database", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unable to truncate database"); } else if (query.kind == ASTDropQuery::Kind::Detach || query.kind == ASTDropQuery::Kind::Drop) { @@ -339,7 +343,7 @@ BlockIO InterpreterDropQuery::executeToDatabaseImpl(const ASTDropQuery & query, getContext()->checkAccess(AccessType::DROP_DATABASE, database_name); if (query.kind == ASTDropQuery::Kind::Detach && query.permanently) - throw Exception("DETACH PERMANENTLY is not implemented for databases", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DETACH PERMANENTLY is not implemented for databases"); if (database->hasReplicationThread()) database->stopReplication(); diff --git a/src/Interpreters/InterpreterExistsQuery.cpp b/src/Interpreters/InterpreterExistsQuery.cpp index 758c6d81407..90fa15bf63f 100644 --- a/src/Interpreters/InterpreterExistsQuery.cpp +++ b/src/Interpreters/InterpreterExistsQuery.cpp @@ -69,7 +69,7 @@ QueryPipeline InterpreterExistsQuery::executeImpl() else if ((exists_query = query_ptr->as())) { if (exists_query->temporary) - throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Temporary dictionaries are not possible."); String database = getContext()->resolveDatabase(exists_query->getDatabase()); getContext()->checkAccess(AccessType::SHOW_DICTIONARIES, database, exists_query->getTable()); result = DatabaseCatalog::instance().isDictionaryExist({database, exists_query->getTable()}); diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 17a6b695088..3c225522cc4 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -252,7 +252,7 @@ struct ExplainSettings : public Settings { auto it = boolean_settings.find(name_); if (it == boolean_settings.end()) - throw Exception("Unknown setting for ExplainSettings: " + name_, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown setting for ExplainSettings: {}", name_); it->second.get() = value; } @@ -261,7 +261,7 @@ struct ExplainSettings : public Settings { auto it = integer_settings.find(name_); if (it == integer_settings.end()) - throw Exception("Unknown setting for ExplainSettings: " + name_, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown setting for ExplainSettings: {}", name_); it->second.get() = value; } @@ -314,8 +314,8 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) for (const auto & change : set_query.changes) { if (!settings.has(change.name)) - throw Exception("Unknown setting \"" + change.name + "\" for EXPLAIN " + Settings::name + " query. " - "Supported settings: " + settings.getSettingsList(), ErrorCodes::UNKNOWN_SETTING); + throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown setting \"{}\" for EXPLAIN {} query. " + "Supported settings: {}", change.name, Settings::name, settings.getSettingsList()); if (change.value.getType() != Field::Types::UInt64) throw Exception(ErrorCodes::INVALID_SETTING_VALUE, @@ -326,8 +326,8 @@ ExplainSettings checkAndGetSettings(const ASTPtr & ast_settings) { auto value = change.value.get(); if (value > 1) - throw Exception("Invalid value " + std::to_string(value) + " for setting \"" + change.name + - "\". Expected boolean type", ErrorCodes::INVALID_SETTING_VALUE); + throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Invalid value {} for setting \"{}\". " + "Expected boolean type", value, change.name); settings.setBooleanSetting(change.name, value); } @@ -427,7 +427,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() case ASTExplainQuery::QueryPlan: { if (!dynamic_cast(ast.getExplainedQuery().get())) - throw Exception("Only SELECT is supported for EXPLAIN query", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN query"); auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; @@ -521,13 +521,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl() printPipeline(io.pipeline.getProcessors(), buf); } else - throw Exception("Only SELECT and INSERT is supported for EXPLAIN PIPELINE query", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT and INSERT is supported for EXPLAIN PIPELINE query"); break; } case ASTExplainQuery::QueryEstimates: { if (!dynamic_cast(ast.getExplainedQuery().get())) - throw Exception("Only SELECT is supported for EXPLAIN ESTIMATE query", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only SELECT is supported for EXPLAIN ESTIMATE query"); auto settings = checkAndGetSettings(ast.getSettings()); QueryPlan plan; @@ -564,7 +564,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl() case ASTExplainQuery::CurrentTransaction: { if (ast.getSettings()) - throw Exception("Settings are not supported for EXPLAIN CURRENT TRANSACTION query.", ErrorCodes::UNKNOWN_SETTING); + throw Exception(ErrorCodes::UNKNOWN_SETTING, "Settings are not supported for EXPLAIN CURRENT TRANSACTION query."); if (auto txn = getContext()->getCurrentTransaction()) { diff --git a/src/Interpreters/InterpreterExternalDDLQuery.cpp b/src/Interpreters/InterpreterExternalDDLQuery.cpp index 5c06ab4b818..c0acb1e03eb 100644 --- a/src/Interpreters/InterpreterExternalDDLQuery.cpp +++ b/src/Interpreters/InterpreterExternalDDLQuery.cpp @@ -35,7 +35,7 @@ BlockIO InterpreterExternalDDLQuery::execute() const ASTExternalDDLQuery & external_ddl_query = query->as(); if (getContext()->getClientInfo().query_kind != ClientInfo::QueryKind::SECONDARY_QUERY) - throw Exception("Cannot parse and execute EXTERNAL DDL FROM.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse and execute EXTERNAL DDL FROM."); if (external_ddl_query.from->name == "MySQL") { @@ -43,7 +43,7 @@ BlockIO InterpreterExternalDDLQuery::execute() const ASTs & arguments = external_ddl_query.from->arguments->children; if (arguments.size() != 2 || !arguments[0]->as() || !arguments[1]->as()) - throw Exception("MySQL External require two identifier arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "MySQL External require two identifier arguments."); if (external_ddl_query.external_ddl->as()) return MySQLInterpreter::InterpreterMySQLDropQuery( diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp index 4c677ce5e18..efcdde46e49 100644 --- a/src/Interpreters/InterpreterFactory.cpp +++ b/src/Interpreters/InterpreterFactory.cpp @@ -346,7 +346,7 @@ std::unique_ptr InterpreterFactory::get(ASTPtr & query, ContextMut } else { - throw Exception("Unknown type of query: " + query->getID(), ErrorCodes::UNKNOWN_TYPE_OF_QUERY); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Unknown type of query: {}", query->getID()); } } } diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 62f3e190ef6..3b90ac8d284 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -153,7 +153,7 @@ Block InterpreterInsertQuery::getSampleBlock( for (const auto & current_name : names) { if (res.has(current_name)) - throw Exception("Column " + current_name + " specified more than once", ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} specified more than once", current_name); /// Column is not ordinary or ephemeral if (!table_sample_insertable.has(current_name)) @@ -162,13 +162,13 @@ Block InterpreterInsertQuery::getSampleBlock( if (table_sample_physical.has(current_name)) { if (!allow_materialized) - throw Exception("Cannot insert column " + current_name + ", because it is MATERIALIZED column.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", + current_name); res.insert(ColumnWithTypeAndName(table_sample_physical.getByName(current_name).type, current_name)); } else /// The table does not have a column with that name - throw Exception("No such column " + current_name + " in table " + table->getStorageID().getNameForLogs(), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "No such column {} in table {}", + current_name, table->getStorageID().getNameForLogs()); } else res.insert(ColumnWithTypeAndName(table_sample_insertable.getByName(current_name).type, current_name)); @@ -528,7 +528,7 @@ BlockIO InterpreterInsertQuery::execute() { for (const auto & column : metadata_snapshot->getColumns()) if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) - throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert column {}, because it is MATERIALIZED column.", column.name); } res.pipeline = QueryPipelineBuilder::getPipeline(std::move(pipeline)); diff --git a/src/Interpreters/InterpreterKillQueryQuery.cpp b/src/Interpreters/InterpreterKillQueryQuery.cpp index 1b4364351df..40698386ccb 100644 --- a/src/Interpreters/InterpreterKillQueryQuery.cpp +++ b/src/Interpreters/InterpreterKillQueryQuery.cpp @@ -120,7 +120,7 @@ static QueryDescriptors extractQueriesExceptMeAndCheckAccess(const Block & proce } if (res.empty() && access_denied) - throw Exception("User " + my_client.current_user + " attempts to kill query created by " + query_user, ErrorCodes::ACCESS_DENIED); + throw Exception(ErrorCodes::ACCESS_DENIED, "User {} attempts to kill query created by {}", my_client.current_user, query_user); return res; } @@ -291,9 +291,8 @@ BlockIO InterpreterKillQueryQuery::execute() } if (res_columns[0]->empty() && access_denied) - throw Exception( - "Not allowed to kill mutation. To execute this query it's necessary to have the grant " + required_access_rights.toString(), - ErrorCodes::ACCESS_DENIED); + throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill mutation. " + "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); @@ -356,9 +355,8 @@ BlockIO InterpreterKillQueryQuery::execute() } if (res_columns[0]->empty() && access_denied) - throw Exception( - "Not allowed to kill move partition. To execute this query it's necessary to have the grant " + required_access_rights.toString(), - ErrorCodes::ACCESS_DENIED); + throw Exception(ErrorCodes::ACCESS_DENIED, "Not allowed to kill move partition. " + "To execute this query it's necessary to have the grant {}", required_access_rights.toString()); res_io.pipeline = QueryPipeline(Pipe(std::make_shared(header.cloneWithColumns(std::move(res_columns))))); @@ -428,7 +426,7 @@ Block InterpreterKillQueryQuery::getSelectResult(const String & columns, const S while (executor.pull(tmp_block)); if (tmp_block) - throw Exception("Expected one block from input stream", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected one block from input stream"); return res; } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 2578a821a4f..624859300b9 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -382,8 +382,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( const Settings & settings = context->getSettingsRef(); if (settings.max_subquery_depth && options.subquery_depth > settings.max_subquery_depth) - throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(), - ErrorCodes::TOO_DEEP_SUBQUERIES); + throw Exception(ErrorCodes::TOO_DEEP_SUBQUERIES, "Too deep subqueries. Maximum: {}", + settings.max_subquery_depth.toString()); bool has_input = input_pipe != std::nullopt; if (input_pipe) @@ -562,7 +562,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( view = nullptr; } - if (try_move_to_prewhere && storage && storage->canMoveConditionsToPrewhere() && query.where() && !query.prewhere()) + if (try_move_to_prewhere + && storage && storage->canMoveConditionsToPrewhere() + && query.where() && !query.prewhere() + && !query.hasJoin()) /// Join may produce rows with nulls or default values, it's difficult to analyze if they affected or not. { /// PREWHERE optimization: transfer some condition from WHERE to PREWHERE if enabled and viable if (const auto & column_sizes = storage->getColumnSizes(); !column_sizes.empty()) @@ -606,17 +609,23 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (!options.only_analyze) { if (query.sampleSize() && (input_pipe || !storage || !storage->supportsSampling())) - throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); + throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table doesn't support sampling"); if (query.final() && (input_pipe || !storage || !storage->supportsFinal())) - throw Exception( - (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support FINAL" : "Illegal FINAL", - ErrorCodes::ILLEGAL_FINAL); + { + if (!input_pipe && storage) + throw Exception(ErrorCodes::ILLEGAL_FINAL, "Storage {} doesn't support FINAL", storage->getName()); + else + throw Exception(ErrorCodes::ILLEGAL_FINAL, "Illegal FINAL"); + } if (query.prewhere() && (input_pipe || !storage || !storage->supportsPrewhere())) - throw Exception( - (!input_pipe && storage) ? "Storage " + storage->getName() + " doesn't support PREWHERE" : "Illegal PREWHERE", - ErrorCodes::ILLEGAL_PREWHERE); + { + if (!input_pipe && storage) + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Storage {} doesn't support PREWHERE", storage->getName()); + else + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Illegal PREWHERE"); + } /// Save the new temporary tables in the query context for (const auto & it : query_analyzer->getExternalTables()) @@ -936,7 +945,8 @@ static std::pair getWithFillFieldValue(const ASTPtr & node, auto field_type = evaluateConstantExpression(node, context); if (!isColumnedAsNumber(field_type.second)) - throw Exception("Illegal type " + field_type.second->getName() + " of WITH FILL expression, must be numeric type", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Illegal type {} of WITH FILL expression, must be numeric type", field_type.second->getName()); return field_type; } @@ -951,7 +961,8 @@ static std::pair> getWithFillStep(const ASTPt if (isColumnedAsNumber(type)) return std::make_pair(std::move(field), std::nullopt); - throw Exception("Illegal type " + type->getName() + " of WITH FILL expression, must be numeric type", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Illegal type {} of WITH FILL expression, must be numeric type", type->getName()); } static FillColumnDescription getWithFillDescription(const ASTOrderByElement & order_by_elem, const ContextPtr & context) @@ -969,32 +980,30 @@ static FillColumnDescription getWithFillDescription(const ASTOrderByElement & or descr.fill_step = order_by_elem.direction; if (applyVisitor(FieldVisitorAccurateEquals(), descr.fill_step, Field{0})) - throw Exception("WITH FILL STEP value cannot be zero", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL STEP value cannot be zero"); if (order_by_elem.direction == 1) { if (applyVisitor(FieldVisitorAccurateLess(), descr.fill_step, Field{0})) - throw Exception("WITH FILL STEP value cannot be negative for sorting in ascending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL STEP value cannot be negative for sorting in ascending direction"); if (!descr.fill_from.isNull() && !descr.fill_to.isNull() && applyVisitor(FieldVisitorAccurateLess(), descr.fill_to, descr.fill_from)) { - throw Exception("WITH FILL TO value cannot be less than FROM value for sorting in ascending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL TO value cannot be less than FROM value for sorting in ascending direction"); } } else { if (applyVisitor(FieldVisitorAccurateLess(), Field{0}, descr.fill_step)) - throw Exception("WITH FILL STEP value cannot be positive for sorting in descending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "WITH FILL STEP value cannot be positive for sorting in descending direction"); if (!descr.fill_from.isNull() && !descr.fill_to.isNull() && applyVisitor(FieldVisitorAccurateLess(), descr.fill_from, descr.fill_to)) { - throw Exception("WITH FILL FROM value cannot be less than TO value for sorting in descending direction", - ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL FROM value cannot be less than TO value for sorting in descending direction"); } } @@ -1123,14 +1132,13 @@ static UInt64 getLimitUIntValue(const ASTPtr & node, const ContextPtr & context, const auto & [field, type] = evaluateConstantExpression(node, context); if (!isNativeNumber(type)) - throw Exception( - "Illegal type " + type->getName() + " of " + expr + " expression, must be numeric type", ErrorCodes::INVALID_LIMIT_EXPRESSION); + throw Exception(ErrorCodes::INVALID_LIMIT_EXPRESSION, "Illegal type {} of {} expression, must be numeric type", + type->getName(), expr); Field converted = convertFieldToType(field, DataTypeUInt64()); if (converted.isNull()) - throw Exception( - "The value " + applyVisitor(FieldVisitorToString(), field) + " of " + expr + " expression is not representable as UInt64", - ErrorCodes::INVALID_LIMIT_EXPRESSION); + throw Exception(ErrorCodes::INVALID_LIMIT_EXPRESSION, "The value {} of {} expression is not representable as UInt64", + applyVisitor(FieldVisitorToString(), field), expr); return converted.safeGet(); } @@ -1339,7 +1347,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

input_order_info && query_info.input_order_info) - throw Exception("InputOrderInfo is set for projection and for query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "InputOrderInfo is set for projection and for query"); InputOrderInfoPtr input_order_info_for_order; if (!expressions.need_aggregate) input_order_info_for_order = query_info.projection ? query_info.projection->input_order_info : query_info.input_order_info; @@ -1382,7 +1390,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

hasWindow()) - throw Exception( - "Window functions does not support processing from WithMergeableStateAfterAggregation", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Window functions does not support processing from WithMergeableStateAfterAggregation"); } else if (expressions.need_aggregate) { @@ -2242,7 +2248,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } if (!max_block_size) - throw Exception("Setting 'max_block_size' cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "Setting 'max_block_size' cannot be zero"); storage_limits.emplace_back(local_limits); @@ -2256,7 +2262,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc /// Subquery. ASTPtr subquery = extractTableExpression(query, 0); if (!subquery) - throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Subquery expected"); interpreter_subquery = std::make_unique( subquery, getSubqueryContext(context), @@ -2367,7 +2373,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc } } else - throw Exception("Logical error in InterpreterSelectQuery: nowhere to read", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in InterpreterSelectQuery: nowhere to read"); /// Specify the number of threads only if it wasn't specified in storage. /// @@ -2945,7 +2951,7 @@ void InterpreterSelectQuery::executeLimit(QueryPlan & query_plan) if (query.limit_with_ties) { if (!query.orderBy()) - throw Exception("LIMIT WITH TIES without ORDER BY", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY"); order_descr = getSortDescription(query, context); } diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index bf384fa5d86..e3954f2a197 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -54,7 +54,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( size_t num_children = ast->list_of_selects->children.size(); if (!num_children) - throw Exception("Logical error: no children in ASTSelectWithUnionQuery", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no children in ASTSelectWithUnionQuery"); /// Note that we pass 'required_result_column_names' to first SELECT. /// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT, @@ -81,11 +81,9 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery( = getCurrentChildResultHeader(ast->list_of_selects->children.at(query_num), required_result_column_names); if (full_result_header_for_current_select.columns() != full_result_header.columns()) - throw Exception("Different number of columns in UNION ALL elements:\n" - + full_result_header.dumpNames() - + "\nand\n" - + full_result_header_for_current_select.dumpNames() + "\n", - ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + throw Exception(ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH, + "Different number of columns in UNION ALL elements:\n{}\nand\n{}\n", + full_result_header.dumpNames(), full_result_header_for_current_select.dumpNames()); required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size()); for (const auto & pos : positions_of_required_result_columns) @@ -213,11 +211,9 @@ Block InterpreterSelectWithUnionQuery::getCommonHeaderForUnion(const Blocks & he for (size_t query_num = 1; query_num < num_selects; ++query_num) { if (headers[query_num].columns() != num_columns) - throw Exception("Different number of columns in UNION ALL elements:\n" - + common_header.dumpNames() - + "\nand\n" - + headers[query_num].dumpNames() + "\n", - ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH); + throw Exception(ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH, + "Different number of columns in UNION ALL elements:\n{}\nand\n{}\n", + common_header.dumpNames(), headers[query_num].dumpNames()); } std::vector columns(num_selects); diff --git a/src/Interpreters/InterpreterShowCreateQuery.cpp b/src/Interpreters/InterpreterShowCreateQuery.cpp index eff31b168bd..5e1b74681fe 100644 --- a/src/Interpreters/InterpreterShowCreateQuery.cpp +++ b/src/Interpreters/InterpreterShowCreateQuery.cpp @@ -76,14 +76,16 @@ QueryPipeline InterpreterShowCreateQuery::executeImpl() else if ((show_query = query_ptr->as())) { if (show_query->temporary) - throw Exception("Temporary databases are not possible.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Temporary databases are not possible."); show_query->setDatabase(getContext()->resolveDatabase(show_query->getDatabase())); getContext()->checkAccess(AccessType::SHOW_DATABASES, show_query->getDatabase()); create_query = DatabaseCatalog::instance().getDatabase(show_query->getDatabase())->getCreateDatabaseQuery(); } if (!create_query) - throw Exception("Unable to show the create query of " + show_query->getTable() + ". Maybe it was created by the system.", ErrorCodes::THERE_IS_NO_QUERY); + throw Exception(ErrorCodes::THERE_IS_NO_QUERY, + "Unable to show the create query of {}. Maybe it was created by the system.", + show_query->getTable()); if (!getContext()->getSettingsRef().show_table_uuid_in_table_create_query_if_not_nil) { diff --git a/src/Interpreters/InterpreterShowTablesQuery.cpp b/src/Interpreters/InterpreterShowTablesQuery.cpp index a6cea66df84..4e0dfdc9236 100644 --- a/src/Interpreters/InterpreterShowTablesQuery.cpp +++ b/src/Interpreters/InterpreterShowTablesQuery.cpp @@ -105,7 +105,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() } if (query.temporary && !query.from.empty()) - throw Exception("The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "The `FROM` and `TEMPORARY` cannot be used together in `SHOW TABLES`"); String database = getContext()->resolveDatabase(query.from); DatabaseCatalog::instance().assertDatabaseExists(database); @@ -131,7 +131,7 @@ String InterpreterShowTablesQuery::getRewrittenQuery() if (query.temporary) { if (query.dictionaries) - throw Exception("Temporary dictionaries are not possible.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Temporary dictionaries are not possible."); rewritten_query << "is_temporary"; } else diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp index a82a11e7c97..1d0b299295f 100644 --- a/src/Interpreters/InterpreterSystemQuery.cpp +++ b/src/Interpreters/InterpreterSystemQuery.cpp @@ -142,7 +142,7 @@ AccessType getRequiredAccessType(StorageActionBlockType action_type) else if (action_type == ActionLocks::PartsMove) return AccessType::SYSTEM_MOVES; else - throw Exception("Unknown action type: " + std::to_string(action_type), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown action type: {}", std::to_string(action_type)); } constexpr std::string_view table_is_not_replicated = "Table {} is not replicated"; @@ -320,6 +320,10 @@ BlockIO InterpreterSystemQuery::execute() getContext()->checkAccess(AccessType::SYSTEM_DROP_MMAP_CACHE); system_context->dropMMappedFileCache(); break; + case Type::DROP_QUERY_RESULT_CACHE: + getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_RESULT_CACHE); + getContext()->dropQueryResultCache(); + break; #if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: getContext()->checkAccess(AccessType::SYSTEM_DROP_COMPILED_EXPRESSION_CACHE); @@ -435,7 +439,7 @@ BlockIO InterpreterSystemQuery::execute() SymbolIndex::reload(); break; #else - throw Exception("SYSTEM RELOAD SYMBOLS is not supported on current platform", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RELOAD SYMBOLS is not supported on current platform"); #endif } case Type::STOP_MERGES: @@ -553,7 +557,7 @@ BlockIO InterpreterSystemQuery::execute() break; } default: - throw Exception("Unknown type of SYSTEM query", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown type of SYSTEM query"); } return result; @@ -676,7 +680,7 @@ void InterpreterSystemQuery::restartReplicas(ContextMutablePtr system_context) void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) { if (query.replica.empty()) - throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name is empty"); if (!table_id.empty()) { @@ -731,10 +735,13 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) ReplicatedTableStatus status; storage_replicated->getStatus(status); if (status.zookeeper_path == query.replica_zk_path) - throw Exception("There is a local table " + storage_replicated->getStorageID().getNameForLogs() + - ", which has the same table path in ZooKeeper. Please check the path in query. " - "If you want to drop replica of this table, use `DROP TABLE` " - "or `SYSTEM DROP REPLICA 'name' FROM db.table`", ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, + "There is a local table {}, which has the same table path in ZooKeeper. " + "Please check the path in query. " + "If you want to drop replica " + "of this table, use `DROP TABLE` " + "or `SYSTEM DROP REPLICA 'name' FROM db.table`", + storage_replicated->getStorageID().getNameForLogs()); } } } @@ -744,18 +751,17 @@ void InterpreterSystemQuery::dropReplica(ASTSystemQuery & query) bool looks_like_table_path = zookeeper->exists(query.replica_zk_path + "/replicas") || zookeeper->exists(query.replica_zk_path + "/dropped"); if (!looks_like_table_path) - throw Exception("Specified path " + query.replica_zk_path + " does not look like a table path", - ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Specified path {} does not look like a table path", + query.replica_zk_path); if (zookeeper->exists(remote_replica_path + "/is_active")) - throw Exception("Can't remove replica: " + query.replica + ", because it's active", - ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Can't remove replica: {}, because it's active", query.replica); StorageReplicatedMergeTree::dropReplica(zookeeper, query.replica_zk_path, query.replica, log); LOG_INFO(log, "Dropped replica {}", remote_replica_path); } else - throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid query"); } bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const StoragePtr & table) @@ -770,15 +776,15 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora /// Do not allow to drop local replicas and active remote replicas if (query.replica == status.replica_name) - throw Exception("We can't drop local replica, please use `DROP TABLE` " - "if you want to clean the data and drop this replica", ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, + "We can't drop local replica, please use `DROP TABLE` if you want " + "to clean the data and drop this replica"); /// NOTE it's not atomic: replica may become active after this check, but before dropReplica(...) /// However, the main use case is to drop dead replica, which cannot become active. /// This check prevents only from accidental drop of some other replica. if (zookeeper->exists(status.zookeeper_path + "/replicas/" + query.replica + "/is_active")) - throw Exception("Can't drop replica: " + query.replica + ", because it's active", - ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Can't drop replica: {}, because it's active", query.replica); storage_replicated->dropReplica(zookeeper, status.zookeeper_path, query.replica, log); LOG_TRACE(log, "Dropped replica {} of {}", query.replica, table->getStorageID().getNameForLogs()); @@ -789,7 +795,7 @@ bool InterpreterSystemQuery::dropReplicaImpl(ASTSystemQuery & query, const Stora void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) { if (query.replica.empty()) - throw Exception("Replica name is empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name is empty"); auto check_not_local_replica = [](const DatabaseReplicated * replicated, const ASTSystemQuery & query) { @@ -852,7 +858,7 @@ void InterpreterSystemQuery::dropDatabaseReplica(ASTSystemQuery & query) LOG_INFO(log, "Dropped replica {} of Replicated database with path {}", query.replica, query.replica_zk_path); } else - throw Exception("Invalid query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid query"); } void InterpreterSystemQuery::syncReplica() @@ -928,13 +934,13 @@ void InterpreterSystemQuery::flushDistributed(ASTSystemQuery &) if (auto * storage_distributed = dynamic_cast(DatabaseCatalog::instance().getTable(table_id, getContext()).get())) storage_distributed->flushClusterNodesAllData(getContext()); else - throw Exception("Table " + table_id.getNameForLogs() + " is not distributed", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} is not distributed", table_id.getNameForLogs()); } [[noreturn]] void InterpreterSystemQuery::restartDisk(String &) { getContext()->checkAccess(AccessType::SYSTEM_RESTART_DISK); - throw Exception("SYSTEM RESTART DISK is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "SYSTEM RESTART DISK is not supported"); } @@ -956,6 +962,7 @@ AccessRightsElements InterpreterSystemQuery::getRequiredAccessForDDLOnCluster() case Type::DROP_DNS_CACHE: case Type::DROP_MARK_CACHE: case Type::DROP_MMAP_CACHE: + case Type::DROP_QUERY_RESULT_CACHE: #if USE_EMBEDDED_COMPILER case Type::DROP_COMPILED_EXPRESSION_CACHE: #endif diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index 9d153c2a9d2..b2086831e4e 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -66,12 +66,12 @@ QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() auto storage_name = storage->getName(); if (storage_name == "LiveView" && !getContext()->getSettingsRef().allow_experimental_live_view) - throw Exception("Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')"); else if (storage_name == "WindowView" && !getContext()->getSettingsRef().allow_experimental_window_view) - throw Exception("Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')"); /// List of columns to read to execute the query. Names required_columns = storage->getInMemoryMetadataPtr()->getColumns().getNamesOfPhysical(); @@ -82,10 +82,8 @@ QueryPipelineBuilder InterpreterWatchQuery::buildQueryPipeline() /// Limitation on the number of columns to read. if (settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read) - throw Exception("Limit for number of columns to read exceeded. " - "Requested: " + std::to_string(required_columns.size()) - + ", maximum: " + settings.max_columns_to_read.toString(), - ErrorCodes::TOO_MANY_COLUMNS); + throw Exception(ErrorCodes::TOO_MANY_COLUMNS, "Limit for number of columns to read exceeded. " + "Requested: {}, maximum: {}", required_columns.size(), settings.max_columns_to_read.toString()); size_t max_block_size = settings.max_block_size; size_t max_streams = 1; diff --git a/src/Interpreters/InterserverCredentials.cpp b/src/Interpreters/InterserverCredentials.cpp index 6e36b06f9cc..094b58789a8 100644 --- a/src/Interpreters/InterserverCredentials.cpp +++ b/src/Interpreters/InterserverCredentials.cpp @@ -14,10 +14,11 @@ std::unique_ptr InterserverCredentials::make(const Poco::Util::AbstractConfiguration & config, const std::string & root_tag) { if (config.has("user") && !config.has("password")) - throw Exception("Configuration parameter interserver_http_credentials.password can't be empty", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Configuration parameter interserver_http_credentials.password can't be empty"); if (!config.has("user") && config.has("password")) - throw Exception("Configuration parameter interserver_http_credentials.user can't be empty if user specified", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, + "Configuration parameter interserver_http_credentials.user can't be empty if user specified"); /// They both can be empty auto user = config.getString(root_tag + ".user", ""); diff --git a/src/Interpreters/InterserverIOHandler.h b/src/Interpreters/InterserverIOHandler.h index 69b742db2ec..375c6ee9ca5 100644 --- a/src/Interpreters/InterserverIOHandler.h +++ b/src/Interpreters/InterserverIOHandler.h @@ -7,11 +7,11 @@ #include #include #include +#include #include #include #include -#include #include namespace zkutil @@ -43,7 +43,7 @@ public: /// You need to stop the data transfer if blocker is activated. ActionBlocker blocker; - std::shared_mutex rwlock; + SharedMutex rwlock; }; using InterserverIOEndpointPtr = std::shared_ptr; @@ -60,7 +60,7 @@ public: std::lock_guard lock(mutex); bool inserted = endpoint_map.try_emplace(name, std::move(endpoint)).second; if (!inserted) - throw Exception("Duplicate interserver IO endpoint: " + name, ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT); + throw Exception(ErrorCodes::DUPLICATE_INTERSERVER_IO_ENDPOINT, "Duplicate interserver IO endpoint: {}", name); } bool removeEndpointIfExists(const String & name) @@ -77,7 +77,7 @@ public: } catch (...) { - throw Exception("No interserver IO endpoint named " + name, ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT); + throw Exception(ErrorCodes::NO_SUCH_INTERSERVER_IO_ENDPOINT, "No interserver IO endpoint named {}", name); } private: diff --git a/src/Interpreters/JoinSwitcher.cpp b/src/Interpreters/JoinSwitcher.cpp index 996fd1e4ac7..15702784d74 100644 --- a/src/Interpreters/JoinSwitcher.cpp +++ b/src/Interpreters/JoinSwitcher.cpp @@ -41,7 +41,7 @@ bool JoinSwitcher::addJoinedBlock(const Block & block, bool) bool JoinSwitcher::switchJoin() { HashJoin * hash_join = assert_cast(join.get()); - BlocksList right_blocks = hash_join->releaseJoinedBlocks(); + BlocksList right_blocks = hash_join->releaseJoinedBlocks(true); /// Destroy old join & create new one. join = std::make_shared(table_join, right_sample_block); diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 5500c274c23..bac82d967f2 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -42,7 +42,7 @@ ASTPtr makeSubqueryTemplate() ParserTablesInSelectQueryElement parser(true); ASTPtr subquery_template = parseQuery(parser, "(select * from _t) as `--.s`", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); if (!subquery_template) - throw Exception("Cannot parse subquery template", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse subquery template"); return subquery_template; } @@ -98,10 +98,10 @@ public: name = table_name_it->second; it = table_columns.find(table_name_it->second); if (it == table_columns.end()) - throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown qualified identifier: {}", table_name); } else - throw Exception("Unknown qualified identifier: " + table_name, ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown qualified identifier: {}", table_name); } for (const auto & column : it->second) @@ -165,7 +165,7 @@ private: has_asterisks = true; if (!qualified_asterisk->qualifier) - throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier"); auto & identifier = qualified_asterisk->qualifier->as(); @@ -180,7 +180,7 @@ private: transformer->as()) IASTColumnsTransformer::transform(transformer, columns); else - throw Exception("Logical error: qualified asterisk must only have children of IASTColumnsTransformer type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must only have children of IASTColumnsTransformer type"); } } } @@ -267,7 +267,7 @@ bool needRewrite(ASTSelectQuery & select, std::vectorchildren[i]->as(); if (!table) - throw Exception("Table expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table expected"); if (table->table_expression) if (const auto * expression = table->table_expression->as()) @@ -276,7 +276,7 @@ bool needRewrite(ASTSelectQuery & select, std::vectortable_join && !table->array_join) - throw Exception("Joined table expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Joined table expected"); if (table->array_join) { @@ -286,7 +286,7 @@ bool needRewrite(ASTSelectQuery & select, std::vectortable_join->as(); if (join.kind == JoinKind::Comma) - throw Exception("COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "COMMA to CROSS JOIN rewriter is not enabled or cannot rewrite query"); } if (num_tables - num_array_join <= 2) @@ -294,7 +294,7 @@ bool needRewrite(ASTSelectQuery & select, std::vector normalizeColumnNamesExtractNeeded( alias_ident_typed->restoreTable(); bool alias_equals_column_name = alias_ident->getColumnNameWithoutAlias() == ident->getColumnNameWithoutAlias(); if (!alias_equals_column_name) - throw Exception("Alias clashes with qualified column '" + ident->name() + "'", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Alias clashes with qualified column '{}'", ident->name()); } } String short_name = ident->shortName(); @@ -690,7 +690,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast if (table_expressions.size() != data.tables.size() || tables_count != data.tables.size()) - throw Exception("Inconsistent tables count in JOIN rewriter", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent tables count in JOIN rewriter"); /// Replace * and t.* with columns in select expression list. { @@ -753,15 +753,15 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast CheckAliasDependencyVisitor::Data check{data.aliases}; CheckAliasDependencyVisitor(check).visit(expr.second); if (check.dependency) - throw Exception("Cannot rewrite JOINs. Alias '" + expr.first + - "' used in ON section depends on another alias '" + check.dependency->name() + "'", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rewrite JOINs. " + "Alias '{}' used in ON section depends on another alias '{}'", + expr.first, check.dependency->name()); } /// Check same name in aliases, USING and ON sections. Cannot push down alias to ON through USING cause of name masquerading. for (auto * ident : using_identifiers) if (on_aliases.contains(ident->name())) - throw Exception("Cannot rewrite JOINs. Alias '" + ident->name() + "' appears both in ON and USING", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rewrite JOINs. Alias '{}' appears both in ON and USING", ident->name()); using_identifiers.clear(); /// Replace pushdowned expressions with aliases names in original expression lists. @@ -809,10 +809,10 @@ ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_r const auto * left = ast_left->as(); const auto * right = ast_right->as(); if (!left || !right) - throw Exception("Two TablesInSelectQueryElements expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Two TablesInSelectQueryElements expected"); if (!right->table_join) - throw Exception("Table join expected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected"); /// replace '_t' with pair of joined tables RewriteVisitor::Data visitor_data{ast_left, ast_right}; diff --git a/src/Interpreters/JoinUtils.cpp b/src/Interpreters/JoinUtils.cpp index a4ec64ab70e..b8d8dd5df74 100644 --- a/src/Interpreters/JoinUtils.cpp +++ b/src/Interpreters/JoinUtils.cpp @@ -324,17 +324,20 @@ ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names) return ptrs; } -ColumnRawPtrMap materializeColumnsInplaceMap(Block & block, const Names & names) +ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & names) { - ColumnRawPtrMap ptrs; + ColumnPtrMap ptrs; ptrs.reserve(names.size()); for (const auto & column_name : names) { - auto & column = block.getByName(column_name); - column.column = recursiveRemoveLowCardinality(column.column->convertToFullColumnIfConst()); - column.type = recursiveRemoveLowCardinality(column.type); - ptrs[column_name] = column.column.get(); + ColumnPtr column = block.getByName(column_name).column; + + column = column->convertToFullColumnIfConst(); + column = recursiveRemoveLowCardinality(column); + column = recursiveRemoveSparse(column); + + ptrs[column_name] = column; } return ptrs; @@ -529,24 +532,24 @@ bool typesEqualUpToNullability(DataTypePtr left_type, DataTypePtr right_type) JoinMask getColumnAsMask(const Block & block, const String & column_name) { if (column_name.empty()) - return JoinMask(true); + return JoinMask(true, block.rows()); const auto & src_col = block.getByName(column_name); DataTypePtr col_type = recursiveRemoveLowCardinality(src_col.type); if (isNothing(col_type)) - return JoinMask(false); + return JoinMask(false, block.rows()); if (const auto * const_cond = checkAndGetColumn(*src_col.column)) { - return JoinMask(const_cond->getBool(0)); + return JoinMask(const_cond->getBool(0), block.rows()); } ColumnPtr join_condition_col = recursiveRemoveLowCardinality(src_col.column->convertToFullColumnIfConst()); if (const auto * nullable_col = typeid_cast(join_condition_col.get())) { if (isNothing(assert_cast(*col_type).getNestedType())) - return JoinMask(false); + return JoinMask(false, block.rows()); /// Return nested column with NULL set to false const auto & nest_col = assert_cast(nullable_col->getNestedColumn()); @@ -639,9 +642,8 @@ Blocks scatterBlockByHash(const Strings & key_columns_names, const Block & block { if (num_shards == 0) throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of shards must be positive"); - UNUSED(scatterBlockByHashPow2); - // if (likely(isPowerOf2(num_shards))) - // return scatterBlockByHashPow2(key_columns_names, block, num_shards); + if (likely(isPowerOf2(num_shards))) + return scatterBlockByHashPow2(key_columns_names, block, num_shards); return scatterBlockByHashGeneric(key_columns_names, block, num_shards); } diff --git a/src/Interpreters/JoinUtils.h b/src/Interpreters/JoinUtils.h index 36be71f2a91..f112ca22e5b 100644 --- a/src/Interpreters/JoinUtils.h +++ b/src/Interpreters/JoinUtils.h @@ -14,30 +14,34 @@ class TableJoin; class IColumn; using ColumnRawPtrs = std::vector; +using ColumnPtrMap = std::unordered_map; using ColumnRawPtrMap = std::unordered_map; using UInt8ColumnDataPtr = const ColumnUInt8::Container *; namespace JoinCommon { -/// Store boolean column handling constant value without materializing -/// Behaves similar to std::variant, but provides more convenient specialized interface +/// Helper interface to work with mask from JOIN ON section class JoinMask { public: - explicit JoinMask(bool value) + explicit JoinMask() : column(nullptr) - , const_value(value) + {} + + explicit JoinMask(bool value, size_t size) + : column(ColumnUInt8::create(size, value)) {} explicit JoinMask(ColumnPtr col) : column(col) - , const_value(false) {} - bool isConstant() { return !column; } + bool hasData() + { + return column != nullptr; + } - /// Return data if mask is not constant UInt8ColumnDataPtr getData() { if (column) @@ -47,15 +51,11 @@ public: inline bool isRowFiltered(size_t row) const { - if (column) - return !assert_cast(*column).getData()[row]; - return !const_value; + return !assert_cast(*column).getData()[row]; } private: ColumnPtr column; - /// Used if column is null - bool const_value; }; @@ -71,7 +71,7 @@ ColumnPtr emptyNotNullableClone(const ColumnPtr & column); ColumnPtr materializeColumn(const Block & block, const String & name); Columns materializeColumns(const Block & block, const Names & names); ColumnRawPtrs materializeColumnsInplace(Block & block, const Names & names); -ColumnRawPtrMap materializeColumnsInplaceMap(Block & block, const Names & names); +ColumnPtrMap materializeColumnsInplaceMap(const Block & block, const Names & names); ColumnRawPtrs getRawPointers(const Columns & columns); void convertToFullColumnsInplace(Block & block); void convertToFullColumnsInplace(Block & block, const Names & names, bool change_type = true); diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 1d8676cfc57..7c999803b44 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -143,8 +143,8 @@ private: match == IdentifierSemantic::ColumnMatch::DBAndTable) { if (rewritten) - throw Exception("Failed to rewrite distributed table names. Ambiguous column '" + identifier.name() + "'", - ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Failed to rewrite distributed table names. Ambiguous column '{}'", + identifier.name()); /// Table has an alias. So we set a new name qualified by table alias. IdentifierSemantic::setColumnLongName(identifier, table); rewritten = true; @@ -161,8 +161,8 @@ private: if (identifier.name() == table.table) { if (rewritten) - throw Exception("Failed to rewrite distributed table. Ambiguous column '" + identifier.name() + "'", - ErrorCodes::AMBIGUOUS_COLUMN_NAME); + throw Exception(ErrorCodes::AMBIGUOUS_COLUMN_NAME, "Failed to rewrite distributed table. Ambiguous column '{}'", + identifier.name()); identifier.setShortName(table.alias); rewritten = true; } @@ -241,7 +241,7 @@ bool JoinedTables::resolveTables() bool include_materialized_cols = include_all_columns || settings.asterisk_include_materialized_columns; tables_with_columns = getDatabaseAndTablesWithColumns(table_expressions, context, include_alias_cols, include_materialized_cols); if (tables_with_columns.size() != table_expressions.size()) - throw Exception("Unexpected tables count", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected tables count"); if (settings.joined_subquery_requires_alias && tables_with_columns.size() > 1) { @@ -250,9 +250,10 @@ bool JoinedTables::resolveTables() const auto & t = tables_with_columns[i]; if (t.table.table.empty() && t.table.alias.empty()) { - throw Exception("No alias for subquery or table function in JOIN (set joined_subquery_requires_alias=0 to disable restriction). While processing '" - + table_expressions[i]->formatForErrorMessage() + "'", - ErrorCodes::ALIAS_REQUIRED); + throw Exception(ErrorCodes::ALIAS_REQUIRED, + "No alias for subquery or table function " + "in JOIN (set joined_subquery_requires_alias=0 to disable restriction). " + "While processing '{}'", table_expressions[i]->formatForErrorMessage()); } } } diff --git a/src/Interpreters/Lemmatizers.cpp b/src/Interpreters/Lemmatizers.cpp index 5044aae083c..c24679de76e 100644 --- a/src/Interpreters/Lemmatizers.cpp +++ b/src/Interpreters/Lemmatizers.cpp @@ -57,17 +57,17 @@ Lemmatizers::Lemmatizers(const Poco::Util::AbstractConfiguration & config) const auto & lemm_path = config.getString(prefix + "." + key + ".path", ""); if (lemm_name.empty()) - throw Exception("Lemmatizer language in config is not specified here: " + prefix + "." + key + ".lang", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer language in config is not specified here: " + "{}.{}.lang", prefix, key); if (lemm_path.empty()) - throw Exception("Path to lemmatizer in config is not specified here: " + prefix + "." + key + ".path", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Path to lemmatizer in config is not specified here: {}.{}.path", + prefix, key); paths[lemm_name] = lemm_path; } else - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'lemmatizer'", - ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}, must be 'lemmatizer'", + prefix, key); } } @@ -81,15 +81,13 @@ Lemmatizers::LemmPtr Lemmatizers::getLemmatizer(const String & name) if (paths.find(name) != paths.end()) { if (!std::filesystem::exists(paths[name])) - throw Exception("Incorrect path to lemmatizer: " + paths[name], - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Incorrect path to lemmatizer: {}", paths[name]); lemmatizers[name] = std::make_shared(paths[name]); return lemmatizers[name]; } - throw Exception("Lemmatizer named: '" + name + "' is not found", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Lemmatizer named: '{}' is not found", name); } } diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index 67ca987d82b..02594269f08 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -156,8 +156,7 @@ void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains() { auto res = or_parent_map.insert(std::make_pair(function, ParentNodes{from_node})); if (!res.second) - throw Exception("LogicalExpressionsOptimizer: parent node information is corrupted", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: parent node information is corrupted"); } } else @@ -332,8 +331,7 @@ void LogicalExpressionsOptimizer::cleanupOrExpressions() auto it = garbage_map.find(or_with_expression.or_function); if (it == garbage_map.end()) - throw Exception("LogicalExpressionsOptimizer: garbage map is corrupted", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: garbage map is corrupted"); auto & first_erased = it->second; first_erased = std::remove_if(operands.begin(), first_erased, [&](const ASTPtr & operand) @@ -369,8 +367,7 @@ void LogicalExpressionsOptimizer::fixBrokenOrExpressions() { auto it = or_parent_map.find(or_function); if (it == or_parent_map.end()) - throw Exception("LogicalExpressionsOptimizer: parent node information is corrupted", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: parent node information is corrupted"); auto & parents = it->second; auto it2 = column_to_position.find(or_function); @@ -379,7 +376,7 @@ void LogicalExpressionsOptimizer::fixBrokenOrExpressions() size_t position = it2->second; bool inserted = column_to_position.emplace(operands[0].get(), position).second; if (!inserted) - throw Exception("LogicalExpressionsOptimizer: internal error", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: internal error"); column_to_position.erase(it2); } diff --git a/src/Interpreters/MergeJoin.cpp b/src/Interpreters/MergeJoin.cpp index 2d54accc76a..a5ab6b25d02 100644 --- a/src/Interpreters/MergeJoin.cpp +++ b/src/Interpreters/MergeJoin.cpp @@ -55,7 +55,7 @@ ColumnWithTypeAndName condtitionColumnToJoinable(const Block & block, const Stri if (!src_column_name.empty()) { auto join_mask = JoinCommon::getColumnAsMask(block, src_column_name); - if (!join_mask.isConstant()) + if (join_mask.hasData()) { for (size_t i = 0; i < res_size; ++i) null_map->getData()[i] = join_mask.isRowFiltered(i); @@ -123,7 +123,7 @@ int nullableCompareAt(const IColumn & left_column, const IColumn & right_column, Block extractMinMax(const Block & block, const Block & keys) { if (block.rows() == 0) - throw Exception("Unexpected empty block", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty block"); Block min_max = keys.cloneEmpty(); MutableColumns columns = min_max.mutateColumns(); @@ -227,7 +227,7 @@ public: { /// SortCursorImpl can work with permutation, but MergeJoinCursor can't. if (impl.permutation) - throw Exception("Logical error: MergeJoinCursor doesn't support permutation", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: MergeJoinCursor doesn't support permutation"); } size_t position() const { return impl.getRow(); } @@ -261,7 +261,7 @@ public: int intersect(const Block & min_max, const Names & key_names) { if (end() == 0 || min_max.rows() != 2) - throw Exception("Unexpected block size", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected block size"); size_t last_position = end() - 1; int first_vs_max = 0; @@ -488,25 +488,25 @@ MergeJoin::MergeJoin(std::shared_ptr table_join_, const Block & right case JoinStrictness::Any: case JoinStrictness::Semi: if (!is_left && !is_inner) - throw Exception("Not supported. MergeJoin supports SEMI and ANY variants only for LEFT and INNER JOINs.", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not supported. MergeJoin supports SEMI and ANY variants only for LEFT and INNER JOINs."); break; default: - throw Exception("Not supported. MergeJoin supports ALL, ANY and SEMI JOINs variants.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not supported. MergeJoin supports ALL, ANY and SEMI JOINs variants."); } if (!max_rows_in_right_block) - throw Exception("partial_merge_join_rows_in_right_blocks cannot be zero", ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "partial_merge_join_rows_in_right_blocks cannot be zero"); if (max_files_to_merge < 2) - throw Exception("max_files_to_merge cannot be less than 2", ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, "max_files_to_merge cannot be less than 2"); if (!size_limits.hasLimits()) { size_limits.max_bytes = table_join->defaultMaxBytes(); if (!size_limits.max_bytes) - throw Exception("No limit for MergeJoin (max_rows_in_join, max_bytes_in_join or default_max_bytes_in_join have to be set)", - ErrorCodes::PARAMETER_OUT_OF_BOUND); + throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND, + "No limit for MergeJoin (max_rows_in_join, max_bytes_in_join " + "or default_max_bytes_in_join have to be set)"); } if (!table_join->oneDisjunct()) diff --git a/src/Interpreters/MergeJoin.h b/src/Interpreters/MergeJoin.h index 770ca0409bf..8b5d884a0e6 100644 --- a/src/Interpreters/MergeJoin.h +++ b/src/Interpreters/MergeJoin.h @@ -1,7 +1,6 @@ #pragma once -#include - +#include #include #include #include @@ -72,7 +71,7 @@ private: using Cache = CacheBase, BlockByteWeight>; - mutable std::shared_mutex rwlock; + mutable SharedMutex rwlock; std::shared_ptr table_join; SizeLimits size_limits; SortDescription left_sort_description; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index cec03863c69..c207309a274 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -247,8 +247,7 @@ bool isStorageTouchedByMutations( if (!block.rows()) return false; else if (block.rows() != 1) - throw Exception("count() expression returned " + toString(block.rows()) + " rows, not 1", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "count() expression returned {} rows, not 1", block.rows()); Block tmp_block; while (executor.pull(tmp_block)); @@ -276,7 +275,7 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( else if (storage_from_merge_tree_data_part) partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context); else - throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables"); partition_predicate_as_ast_func = makeASTFunction("equals", std::make_shared("_partition_id"), @@ -456,14 +455,14 @@ static void validateUpdateColumns( for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) { if (col.name == column_name) - throw Exception("Cannot UPDATE materialized column " + backQuote(column_name), ErrorCodes::CANNOT_UPDATE_COLUMN); + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name)); } - throw Exception("There is no column " + backQuote(column_name) + " in table", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name)); } if (key_columns.contains(column_name)) - throw Exception("Cannot UPDATE key column " + backQuote(column_name), ErrorCodes::CANNOT_UPDATE_COLUMN); + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE key column {}", backQuote(column_name)); auto materialized_it = column_to_affected_materialized.find(column_name); if (materialized_it != column_to_affected_materialized.end()) @@ -471,9 +470,9 @@ static void validateUpdateColumns( for (const String & materialized : materialized_it->second) { if (key_columns.contains(materialized)) - throw Exception("Updated column " + backQuote(column_name) + " affects MATERIALIZED column " - + backQuote(materialized) + ", which is a key column. Cannot UPDATE it.", - ErrorCodes::CANNOT_UPDATE_COLUMN); + throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, + "Updated column {} affects MATERIALIZED column {}, which is a key column. " + "Cannot UPDATE it.", backQuote(column_name), backQuote(materialized)); } } } @@ -512,10 +511,10 @@ static std::optional> getExpressionsOfUpdatedNestedSubcolumn void MutationsInterpreter::prepare(bool dry_run) { if (is_prepared) - throw Exception("MutationsInterpreter is already prepared. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MutationsInterpreter is already prepared. It is a bug."); if (commands.empty()) - throw Exception("Empty mutation commands list", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty mutation commands list"); const ColumnsDescription & columns_desc = metadata_snapshot->getColumns(); const IndicesDescription & indices_desc = metadata_snapshot->getSecondaryIndices(); @@ -713,7 +712,7 @@ void MutationsInterpreter::prepare(bool dry_run) return index.name == command.index_name; }); if (it == std::cend(indices_desc)) - throw Exception("Unknown index: " + command.index_name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown index: {}", command.index_name); auto query = (*it).expression_list_ast->clone(); auto syntax_result = TreeRewriter(context).analyze(query, all_columns); @@ -802,7 +801,7 @@ void MutationsInterpreter::prepare(bool dry_run) read_columns.emplace_back(command.column_name); } else - throw Exception("Unknown mutation command type: " + DB::toString(command.type), ErrorCodes::UNKNOWN_MUTATION_COMMAND); + throw Exception(ErrorCodes::UNKNOWN_MUTATION_COMMAND, "Unknown mutation command type: {}", DB::toString(command.type)); } if (!read_columns.empty()) @@ -1178,7 +1177,7 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v { const auto & step = stage.expressions_chain.steps[i]; if (step->actions()->hasArrayJoin()) - throw Exception("arrayJoin is not allowed in mutations", ErrorCodes::UNEXPECTED_EXPRESSION); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "arrayJoin is not allowed in mutations"); if (i < stage.filter_column_names.size()) { /// Execute DELETEs. @@ -1239,7 +1238,7 @@ void MutationsInterpreter::validate() QueryPipelineBuilder MutationsInterpreter::execute() { if (!can_execute) - throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute mutations interpreter because can_execute flag set to false"); QueryPlan plan; initQueryPlan(stages.front(), plan); diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp index 70773e2fffb..7ba7749e89b 100644 --- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp +++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp @@ -78,7 +78,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) const auto & declare_column = declare_column_ast->as(); if (!declare_column || !declare_column->data_type) - throw Exception("Missing type in definition of column.", ErrorCodes::UNKNOWN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Missing type in definition of column."); bool is_nullable = true; bool is_unsigned = false; @@ -147,7 +147,7 @@ NamesAndTypesList getColumnsList(const ASTExpressionList * columns_definition) static ColumnsDescription createColumnsDescription(const NamesAndTypesList & columns_name_and_type, const ASTExpressionList * columns_definition) { if (columns_name_and_type.size() != columns_definition->children.size()) - throw Exception("Columns of different size provided.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Columns of different size provided."); ColumnsDescription columns_description; @@ -337,7 +337,7 @@ static ASTPtr getPartitionPolicy(const NamesAndTypesList & primary_keys) WhichDataType which(type); if (which.isNullable()) - throw Exception("LOGICAL ERROR: MySQL primary key must be not null, it is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: MySQL primary key must be not null, it is a bug."); if (which.isDate() || which.isDate32() || which.isDateTime() || which.isDateTime64()) { @@ -440,7 +440,7 @@ void InterpreterCreateImpl::validate(const InterpreterCreateImpl::TQuery & creat missing_columns_definition = false; } if (missing_columns_definition) - throw Exception("Missing definition of columns.", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Missing definition of columns."); } } @@ -473,8 +473,8 @@ ASTs InterpreterCreateImpl::getRewrittenQueries( ColumnsDescription columns_description = createColumnsDescription(columns_name_and_type, create_defines->columns); if (primary_keys.empty()) - throw Exception("The " + backQuoteIfNeed(mysql_database) + "." + backQuoteIfNeed(create_query.table) - + " cannot be materialized, because there is no primary keys.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "The {}.{} cannot be materialized, because there is no primary keys.", + backQuoteIfNeed(mysql_database), backQuoteIfNeed(create_query.table)); auto columns = std::make_shared(); @@ -572,7 +572,7 @@ ASTs InterpreterDropImpl::getRewrittenQueries( void InterpreterRenameImpl::validate(const InterpreterRenameImpl::TQuery & rename_query, ContextPtr /*context*/) { if (rename_query.exchange) - throw Exception("Cannot execute exchange for external ddl query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot execute exchange for external ddl query."); } ASTs InterpreterRenameImpl::getRewrittenQueries( @@ -585,7 +585,7 @@ ASTs InterpreterRenameImpl::getRewrittenQueries( const auto & from_database = resolveDatabase(rename_element.from.database, mysql_database, mapped_to_database, context); if ((from_database == mapped_to_database || to_database == mapped_to_database) && to_database != from_database) - throw Exception("Cannot rename with other database for external ddl query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename with other database for external ddl query."); if (from_database == mapped_to_database) { @@ -718,7 +718,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( auto modify_columns = getColumnsList(alter_command->additional_columns); if (modify_columns.size() != 1) - throw Exception("It is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "It is a bug"); new_column_name = modify_columns.front().name; @@ -751,7 +751,7 @@ ASTs InterpreterAlterImpl::getRewrittenQueries( const auto & to_database = resolveDatabase(alter_command->new_database_name, mysql_database, mapped_to_database, context); if (to_database != mapped_to_database) - throw Exception("Cannot rename with other database for external ddl query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename with other database for external ddl query."); /// For ALTER TABLE table_name RENAME TO new_table_name_1, RENAME TO new_table_name_2; /// We just need to generate RENAME TABLE table_name TO new_table_name_2; diff --git a/src/Interpreters/OptimizeIfChains.cpp b/src/Interpreters/OptimizeIfChains.cpp index d4b4d20bb9c..ba4c7bcd95f 100644 --- a/src/Interpreters/OptimizeIfChains.cpp +++ b/src/Interpreters/OptimizeIfChains.cpp @@ -61,13 +61,14 @@ ASTs OptimizeIfChainsVisitor::ifChain(const ASTPtr & child) { const auto * function_node = child->as(); if (!function_node || !function_node->arguments) - throw Exception("Unexpected AST for function 'if'", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected AST for function 'if'"); const auto * function_args = function_node->arguments->as(); if (!function_args || function_args->children.size() != 3) - throw Exception("Wrong number of arguments for function 'if' (" + toString(function_args->children.size()) + " instead of 3)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_args->children.size()); const auto * else_arg = function_args->children[2]->as(); diff --git a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp index 824ad22bb12..13b6311a877 100644 --- a/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp +++ b/src/Interpreters/OptimizeIfWithConstantConditionVisitor.cpp @@ -39,7 +39,7 @@ static bool tryExtractConstValueFromCondition(const ASTPtr & condition, bool & v if (const auto * expr_list = function->arguments->as()) { if (expr_list->children.size() != 2) - throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); const ASTPtr & type_ast = expr_list->children.at(1); if (const auto * type_literal = type_ast->as()) @@ -85,12 +85,12 @@ void OptimizeIfWithConstantConditionVisitor::visit(ASTPtr & current_ast) } if (!function_node->arguments) - throw Exception("Wrong number of arguments for function 'if' (0 instead of 3)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Wrong number of arguments for function 'if' (0 instead of 3)"); if (function_node->arguments->children.size() != 3) - throw Exception( - "Wrong number of arguments for function 'if' (" + toString(function_node->arguments->children.size()) + " instead of 3)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments for function 'if' ({} instead of 3)", + function_node->arguments->children.size()); visit(function_node->arguments); const auto * args = function_node->arguments->as(); diff --git a/src/Interpreters/PredicateExpressionsOptimizer.cpp b/src/Interpreters/PredicateExpressionsOptimizer.cpp index fd77f651ff5..d9ea29fe1d8 100644 --- a/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -91,8 +91,9 @@ bool PredicateExpressionsOptimizer::tryRewritePredicatesToTables(ASTs & tables_e bool is_rewrite_tables = false; if (tables_element.size() != tables_predicates.size()) - throw Exception("Unexpected elements count in predicate push down: `set enable_optimize_predicate_expression = 0` to disable", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Unexpected elements count in predicate push down: " + "`set enable_optimize_predicate_expression = 0` to disable"); for (size_t index = tables_element.size(); index > 0; --index) { diff --git a/src/Interpreters/ProcessList.cpp b/src/Interpreters/ProcessList.cpp index 60298142d92..aab8cff71ad 100644 --- a/src/Interpreters/ProcessList.cpp +++ b/src/Interpreters/ProcessList.cpp @@ -78,7 +78,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q const Settings & settings = query_context->getSettingsRef(); if (client_info.current_query_id.empty()) - throw Exception("Query id cannot be empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Query id cannot be empty"); bool is_unlimited_query = isUnlimitedQuery(ast); @@ -92,7 +92,7 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q if (queue_max_wait_ms) LOG_WARNING(&Poco::Logger::get("ProcessList"), "Too many simultaneous queries, will wait {} ms.", queue_max_wait_ms); if (!queue_max_wait_ms || !have_space.wait_for(lock, std::chrono::milliseconds(queue_max_wait_ms), [&]{ return processes.size() < max_size; })) - throw Exception("Too many simultaneous queries. Maximum: " + toString(max_size), ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries. Maximum: {}", max_size); } if (!is_unlimited_query) @@ -130,10 +130,8 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q if (!is_unlimited_query && settings.max_concurrent_queries_for_all_users && processes.size() >= settings.max_concurrent_queries_for_all_users) - throw Exception( - "Too many simultaneous queries for all users. Current: " + toString(processes.size()) - + ", maximum: " + settings.max_concurrent_queries_for_all_users.toString(), - ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, "Too many simultaneous queries for all users. " + "Current: {}, maximum: {}", processes.size(), settings.max_concurrent_queries_for_all_users.toString()); } /** Why we use current user? @@ -153,10 +151,11 @@ ProcessList::insert(const String & query_, const IAST * ast, ContextMutablePtr q { if (!is_unlimited_query && settings.max_concurrent_queries_for_user && user_process_list->second.queries.size() >= settings.max_concurrent_queries_for_user) - throw Exception("Too many simultaneous queries for user " + client_info.current_user - + ". Current: " + toString(user_process_list->second.queries.size()) - + ", maximum: " + settings.max_concurrent_queries_for_user.toString(), - ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES); + throw Exception(ErrorCodes::TOO_MANY_SIMULTANEOUS_QUERIES, + "Too many simultaneous queries for user {}. " + "Current: {}, maximum: {}", + client_info.current_user, user_process_list->second.queries.size(), + settings.max_concurrent_queries_for_user.toString()); auto running_query = user_process_list->second.queries.find(client_info.current_query_id); diff --git a/src/Interpreters/ProcessList.h b/src/Interpreters/ProcessList.h index 34edfc5a2e2..eae8b15c695 100644 --- a/src/Interpreters/ProcessList.h +++ b/src/Interpreters/ProcessList.h @@ -25,7 +25,6 @@ #include #include #include -#include #include #include diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 921d004af94..4db61501d3d 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -33,7 +33,7 @@ public: : data(data_) { if (data.level > data.settings.max_ast_depth) - throw Exception("Normalized AST is too deep. Maximum: " + toString(data.settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST); + throw Exception(ErrorCodes::TOO_DEEP_AST, "Normalized AST is too deep. Maximum: {}", data.settings.max_ast_depth); ++data.level; } @@ -83,7 +83,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). auto it_alias = data.aliases.find(node.name()); if (!data.allow_self_aliases && current_alias == node.name()) - throw Exception(ErrorCodes::CYCLIC_ALIASES, "Self referencing of {} to {}. Cyclic alias", backQuote(current_alias), backQuote(node.name())); + throw Exception(ErrorCodes::CYCLIC_ALIASES, "Self referencing of {} to {}. Cyclic alias", + backQuote(current_alias), backQuote(node.name())); if (it_alias != data.aliases.end() && current_alias != node.name()) { @@ -101,7 +102,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) if (current_asts.contains(alias_node.get()) /// We have loop of multiple aliases || (node.name() == our_alias_or_name && our_name && node_alias == *our_name)) /// Our alias points to node.name, direct loop - throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); + throw Exception(ErrorCodes::CYCLIC_ALIASES, "Cyclic aliases"); /// Let's replace it with the corresponding tree node. if (!node_alias.empty() && node_alias != our_alias_or_name) @@ -178,7 +179,7 @@ void QueryNormalizer::visitChildren(IAST * node, Data & data) if (func_node->tryGetQueryArgument()) { if (func_node->name != "view") - throw Exception("Query argument can only be used in the `view` TableFunction", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Query argument can only be used in the `view` TableFunction"); /// Don't go into query argument. return; } @@ -268,7 +269,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) else if (auto * node_param = ast->as()) { if (!data.is_create_parameterized_view) - throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Query parameter {} was not set", backQuote(node_param->name)); } else if (auto * node_function = ast->as()) if (node_function->parameters) diff --git a/src/Interpreters/ReplaceQueryParameterVisitor.cpp b/src/Interpreters/ReplaceQueryParameterVisitor.cpp index 98e35d69ab7..6f7a0b83128 100644 --- a/src/Interpreters/ReplaceQueryParameterVisitor.cpp +++ b/src/Interpreters/ReplaceQueryParameterVisitor.cpp @@ -55,7 +55,7 @@ const String & ReplaceQueryParameterVisitor::getParamValue(const String & name) if (search != query_parameters.end()) return search->second; else - throw Exception("Substitution " + backQuote(name) + " is not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + throw Exception(ErrorCodes::UNKNOWN_QUERY_PARAMETER, "Substitution {} is not set", backQuote(name)); } void ReplaceQueryParameterVisitor::visitQueryParameter(ASTPtr & ast) diff --git a/src/Interpreters/RequiredSourceColumnsVisitor.cpp b/src/Interpreters/RequiredSourceColumnsVisitor.cpp index 18cbfaee63f..1bcec02f0c0 100644 --- a/src/Interpreters/RequiredSourceColumnsVisitor.cpp +++ b/src/Interpreters/RequiredSourceColumnsVisitor.cpp @@ -21,19 +21,19 @@ namespace ErrorCodes std::vector RequiredSourceColumnsMatcher::extractNamesFromLambda(const ASTFunction & node) { if (node.arguments->children.size() != 2) - throw Exception("lambda requires two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "lambda requires two arguments"); const auto * lambda_args_tuple = node.arguments->children[0]->as(); if (!lambda_args_tuple || lambda_args_tuple->name != "tuple") - throw Exception("First argument of lambda must be a tuple", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "First argument of lambda must be a tuple"); std::vector names; for (auto & child : lambda_args_tuple->arguments->children) { const auto * identifier = child->as(); if (!identifier) - throw Exception("lambda argument declarations must be identifiers", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "lambda argument declarations must be identifiers"); names.push_back(identifier->name()); } @@ -171,7 +171,7 @@ void RequiredSourceColumnsMatcher::visit(const ASTIdentifier & node, const ASTPt { // FIXME(ilezhankin): shouldn't ever encounter if (node.name().empty()) - throw Exception("Expected not empty name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Expected not empty name"); if (!data.private_aliases.contains(node.name())) data.addColumnIdentifier(node); @@ -211,7 +211,7 @@ void RequiredSourceColumnsMatcher::visit(const ASTArrayJoin & node, const ASTPtr { ASTPtr expression_list = node.expression_list; if (!expression_list || expression_list->children.empty()) - throw Exception("Expected not empty expression_list", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Expected not empty expression_list"); std::vector out; diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 0e553ef145e..2d4f807ad46 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -238,7 +238,7 @@ AsofRowRefs createAsofRowRef(TypeIndex type, ASOFJoinInequality inequality) result = std::make_unique>(); break; default: - throw Exception("Invalid ASOF Join order", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ASOF Join order"); } }; @@ -265,7 +265,7 @@ std::optional SortedLookupVectorBase::getTypeSize(const IColumn & aso DISPATCH(DateTime64) #undef DISPATCH - throw Exception("ASOF join not supported for type: " + std::string(asof_column.getFamilyName()), ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "ASOF join not supported for type: {}", std::string(asof_column.getFamilyName())); } } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index b6f120edc6c..5c72e24c577 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -107,7 +107,7 @@ public: if (it == sessions.end()) { if (throw_if_not_found) - throw Exception("Session not found.", ErrorCodes::SESSION_NOT_FOUND); + throw Exception(ErrorCodes::SESSION_NOT_FOUND, "Session not found."); /// Create a new session from current context. auto context = Context::createCopy(global_context); @@ -129,7 +129,7 @@ public: LOG_TEST(log, "Reuse session from storage with session_id: {}, user_id: {}", key.second, key.first); if (!session.unique()) - throw Exception("Session is locked by a concurrent client.", ErrorCodes::SESSION_IS_LOCKED); + throw Exception(ErrorCodes::SESSION_IS_LOCKED, "Session is locked by a concurrent client."); return {session, false}; } } @@ -311,7 +311,7 @@ void Session::authenticate(const String & user_name, const String & password, co void Session::authenticate(const Credentials & credentials_, const Poco::Net::SocketAddress & address_) { if (session_context) - throw Exception("If there is a session context it must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "If there is a session context it must be created after authentication"); auto address = address_; if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL)) @@ -362,11 +362,11 @@ const ClientInfo & Session::getClientInfo() const ContextMutablePtr Session::makeSessionContext() { if (session_context) - throw Exception("Session context already exists", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context already exists"); if (query_context_created) - throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) - throw Exception("Session context must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); LOG_DEBUG(log, "{} Creating session context with user_id: {}", toString(auth_id), toString(*user_id)); @@ -394,11 +394,11 @@ ContextMutablePtr Session::makeSessionContext() ContextMutablePtr Session::makeSessionContext(const String & session_name_, std::chrono::steady_clock::duration timeout_, bool session_check_) { if (session_context) - throw Exception("Session context already exists", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context already exists"); if (query_context_created) - throw Exception("Session context must be created before any query context", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created before any query context"); if (!user_id) - throw Exception("Session context must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); LOG_DEBUG(log, "{} Creating named session context with name: {}, user_id: {}", toString(auth_id), session_name_, toString(*user_id)); @@ -453,7 +453,7 @@ std::shared_ptr Session::getSessionLog() const ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const { if (!user_id && getClientInfo().interface != ClientInfo::Interface::TCP_INTERSERVER) - throw Exception("Session context must be created after authentication", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Session context must be created after authentication"); /// We can create a query context either from a session context or from a global context. bool from_session_context = static_cast(session_context); diff --git a/src/Interpreters/Set.cpp b/src/Interpreters/Set.cpp index e75232aa0f5..75bb05f8346 100644 --- a/src/Interpreters/Set.cpp +++ b/src/Interpreters/Set.cpp @@ -176,10 +176,10 @@ bool Set::insertFromBlock(const ColumnsWithTypeAndName & columns) bool Set::insertFromBlock(const Columns & columns) { - std::lock_guard lock(rwlock); + std::lock_guard lock(rwlock); if (data.empty()) - throw Exception("Method Set::setHeader must be called before Set::insertFromBlock", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Method Set::setHeader must be called before Set::insertFromBlock"); ColumnRawPtrs key_columns; key_columns.reserve(keys_size); @@ -242,7 +242,7 @@ ColumnPtr Set::execute(const ColumnsWithTypeAndName & columns, bool negative) co size_t num_key_columns = columns.size(); if (0 == num_key_columns) - throw Exception("Logical error: no columns passed to Set::execute method.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no columns passed to Set::execute method."); auto res = ColumnUInt8::create(); ColumnUInt8::Container & vec_res = res->getData(); @@ -416,9 +416,9 @@ bool Set::areTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) con void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) const { if (!this->areTypesEqual(set_type_idx, other_type)) - throw Exception("Types of column " + toString(set_type_idx + 1) + " in section IN don't match: " - + other_type->getName() + " on the left, " - + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Types of column {} in section IN don't match: " + "{} on the left, {} on the right", toString(set_type_idx + 1), + other_type->getName(), data_types[set_type_idx]->getName()); } MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector && indexes_mapping_) diff --git a/src/Interpreters/Set.h b/src/Interpreters/Set.h index bafb0dcea7a..00eff614c7c 100644 --- a/src/Interpreters/Set.h +++ b/src/Interpreters/Set.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include #include @@ -8,6 +7,7 @@ #include #include +#include #include @@ -131,7 +131,7 @@ private: /** Protects work with the set in the functions `insertFromBlock` and `execute`. * These functions can be called simultaneously from different threads only when using StorageSet, */ - mutable std::shared_mutex rwlock; + mutable SharedMutex rwlock; template void insertFromBlockImpl( diff --git a/src/Interpreters/SetVariants.cpp b/src/Interpreters/SetVariants.cpp index f1fdc6c4095..cd9148a01cf 100644 --- a/src/Interpreters/SetVariants.cpp +++ b/src/Interpreters/SetVariants.cpp @@ -119,7 +119,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose /// Pack if possible all the keys along with information about which key values are nulls /// into a fixed 16- or 32-byte blob. if (keys_bytes > (std::numeric_limits::max() - std::tuple_size>::value)) - throw Exception{"Aggregator: keys sizes overflow", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Aggregator: keys sizes overflow"); if ((std::tuple_size>::value + keys_bytes) <= 16) return Type::nullable_keys128; if ((std::tuple_size>::value + keys_bytes) <= 32) @@ -146,7 +146,7 @@ typename SetVariantsTemplate::Type SetVariantsTemplate::choose return Type::keys128; if (size_of_field == 32) return Type::keys256; - throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32."); } /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys diff --git a/src/Interpreters/SetVariants.h b/src/Interpreters/SetVariants.h index d6be996effb..71187c9f109 100644 --- a/src/Interpreters/SetVariants.h +++ b/src/Interpreters/SetVariants.h @@ -136,20 +136,17 @@ class BaseStateKeysFixed protected: void init(const ColumnRawPtrs &) { - throw Exception{"Internal error: calling init() for non-nullable" - " keys is forbidden", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error: calling init() for non-nullable keys is forbidden"); } const ColumnRawPtrs & getActualColumns() const { - throw Exception{"Internal error: calling getActualColumns() for non-nullable" - " keys is forbidden", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error: calling getActualColumns() for non-nullable keys is forbidden"); } KeysNullMap createBitmap(size_t) const { - throw Exception{"Internal error: calling createBitmap() for non-nullable keys" - " is forbidden", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Internal error: calling createBitmap() for non-nullable keys is forbidden"); } }; diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 593b141e550..4ed0dddc191 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -110,7 +110,7 @@ bool SquashingTransform::isEnoughSize(const Block & block) if (!rows) rows = column->size(); else if (rows != column->size()) - throw Exception("Sizes of columns doesn't match", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); bytes += column->byteSize(); } diff --git a/src/Interpreters/StorageID.cpp b/src/Interpreters/StorageID.cpp index 70dea02ccc5..b3a504d7ef4 100644 --- a/src/Interpreters/StorageID.cpp +++ b/src/Interpreters/StorageID.cpp @@ -40,7 +40,7 @@ StorageID::StorageID(const ASTPtr & node) else if (const auto * simple_query = dynamic_cast(node.get())) *this = StorageID(*simple_query); else - throw Exception("Unexpected AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected AST"); } String StorageID::getTableName() const @@ -53,7 +53,7 @@ String StorageID::getDatabaseName() const { assertNotEmpty(); if (database_name.empty()) - throw Exception("Database name is empty", ErrorCodes::UNKNOWN_DATABASE); + throw Exception(ErrorCodes::UNKNOWN_DATABASE, "Database name is empty"); return database_name; } diff --git a/src/Interpreters/StorageID.h b/src/Interpreters/StorageID.h index 68c83f753b5..147d50b4e4f 100644 --- a/src/Interpreters/StorageID.h +++ b/src/Interpreters/StorageID.h @@ -79,9 +79,9 @@ struct StorageID { // Can be triggered by user input, e.g. SELECT joinGetOrNull('', 'num', 500) if (empty()) - throw Exception("Both table name and UUID are empty", ErrorCodes::UNKNOWN_TABLE); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Both table name and UUID are empty"); if (table_name.empty() && !database_name.empty()) - throw Exception("Table name is empty, but database name is not", ErrorCodes::UNKNOWN_TABLE); + throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table name is empty, but database name is not"); } /// Avoid implicit construction of empty StorageID. However, it's needed for deferred initialization. diff --git a/src/Interpreters/SubstituteColumnOptimizer.cpp b/src/Interpreters/SubstituteColumnOptimizer.cpp index da738d3db1e..d98491aaf9e 100644 --- a/src/Interpreters/SubstituteColumnOptimizer.cpp +++ b/src/Interpreters/SubstituteColumnOptimizer.cpp @@ -242,7 +242,7 @@ void SubstituteColumnOptimizer::perform() { auto * list = select_query->refSelect()->as(); if (!list) - throw Exception("List of selected columns must be ASTExpressionList", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "List of selected columns must be ASTExpressionList"); for (ASTPtr & ast : list->children) ast->setAlias(ast->getAliasOrColumnName()); diff --git a/src/Interpreters/SynonymsExtensions.cpp b/src/Interpreters/SynonymsExtensions.cpp index 7979c849975..8715b321a43 100644 --- a/src/Interpreters/SynonymsExtensions.cpp +++ b/src/Interpreters/SynonymsExtensions.cpp @@ -35,8 +35,7 @@ public: { std::ifstream file(path); if (!file.is_open()) - throw Exception("Cannot find synonyms extension at: " + path, - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Cannot find synonyms extension at: {}", path); String line; while (std::getline(file, line)) @@ -104,24 +103,24 @@ SynonymsExtensions::SynonymsExtensions(const Poco::Util::AbstractConfiguration & const auto & ext_type = config.getString(prefix + "." + key + ".type", ""); if (ext_name.empty()) - throw Exception("Extension name in config is not specified here: " + prefix + "." + key + ".name", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension name in config is not specified here: {}.{}.name", + prefix, key); if (ext_path.empty()) - throw Exception("Extension path in config is not specified here: " + prefix + "." + key + ".path", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension path in config is not specified here: {}.{}.path", + prefix, key); if (ext_type.empty()) - throw Exception("Extension type in config is not specified here: " + prefix + "." + key + ".type", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension type in config is not specified here: {}.{}.type", + prefix, key); if (ext_type != "plain" && ext_type != "wordnet") - throw Exception("Unknown extension type in config: " + prefix + "." + key + ".type, must be 'plain' or 'wordnet'", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown extension type in config: " + "{}.{}.type, must be 'plain' or 'wordnet'", prefix, key); info[ext_name].path = ext_path; info[ext_name].type = ext_type; } else - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'extension'", - ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}, must be 'extension'", + prefix, key); } } @@ -141,13 +140,12 @@ SynonymsExtensions::ExtPtr SynonymsExtensions::getExtension(const String & name) else if (ext_info.type == "wordnet") extensions[name] = std::make_shared(ext_info.path); else - throw Exception("Unknown extension type: " + ext_info.type, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown extension type: {}", ext_info.type); return extensions[name]; } - throw Exception("Extension named: '" + name + "' is not found", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Extension named: '{}' is not found", name); } } diff --git a/src/Interpreters/SystemLog.cpp b/src/Interpreters/SystemLog.cpp index b74f550f697..f9343b7889d 100644 --- a/src/Interpreters/SystemLog.cpp +++ b/src/Interpreters/SystemLog.cpp @@ -136,17 +136,15 @@ std::shared_ptr createSystemLog( if (config.has(config_prefix + ".engine")) { if (config.has(config_prefix + ".partition_by")) - throw Exception("If 'engine' is specified for system table, " - "PARTITION BY parameters should be specified directly inside 'engine' and 'partition_by' setting doesn't make sense", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "If 'engine' is specified for system table, PARTITION BY parameters should " + "be specified directly inside 'engine' and 'partition_by' setting doesn't make sense"); if (config.has(config_prefix + ".ttl")) - throw Exception("If 'engine' is specified for system table, " - "TTL parameters should be specified directly inside 'engine' and 'ttl' setting doesn't make sense", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "If 'engine' is specified for system table, " + "TTL parameters should be specified directly inside 'engine' and 'ttl' setting doesn't make sense"); if (config.has(config_prefix + ".storage_policy")) - throw Exception("If 'engine' is specified for system table, SETTINGS storage_policy = '...' " - "should be specified directly inside 'engine' and 'storage_policy' setting doesn't make sense", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "If 'engine' is specified for system table, SETTINGS storage_policy = '...' " + "should be specified directly inside 'engine' and 'storage_policy' setting doesn't make sense"); engine = config.getString(config_prefix + ".engine"); } else diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 78218ac59a5..7ea7a265263 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -141,7 +141,7 @@ void TableJoin::addDisjunct() clauses.emplace_back(); if (getStorageJoin() && clauses.size() > 1) - throw Exception("StorageJoin with ORs is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "StorageJoin with ORs is not supported"); } void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) @@ -490,11 +490,11 @@ void TableJoin::inferJoinKeyCommonType(const LeftNamesAndTypes & left, const Rig if (strictness() == JoinStrictness::Asof) { if (clauses.size() != 1) - throw DB::Exception("ASOF join over multiple keys is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over multiple keys is not supported"); auto asof_key_type = right_types.find(clauses.back().key_names_right.back()); if (asof_key_type != right_types.end() && asof_key_type->second->isNullable()) - throw DB::Exception("ASOF join over right table Nullable column is not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "ASOF join over right table Nullable column is not implemented"); } forAllKeys(clauses, [&](const auto & left_key_name, const auto & right_key_name) diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 9d03c9bd57b..84390adc0df 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -198,7 +198,7 @@ public: : size_limits(limits) , default_max_bytes(0) , join_use_nulls(use_nulls) - , join_algorithm(JoinAlgorithm::HASH) + , join_algorithm(JoinAlgorithm::DEFAULT) { clauses.emplace_back().key_names_right = key_names_right; table_join.kind = kind; diff --git a/src/Interpreters/TableOverrideUtils.cpp b/src/Interpreters/TableOverrideUtils.cpp index 58e885380bf..afff5ac0111 100644 --- a/src/Interpreters/TableOverrideUtils.cpp +++ b/src/Interpreters/TableOverrideUtils.cpp @@ -106,7 +106,9 @@ void TableOverrideAnalyzer::analyze(const StorageInMemoryMetadata & metadata, Re if (auto col_default = metadata.columns.getDefault(found->name)) existing_default_kind = col_default->kind; if (existing_default_kind != override_default_kind) - throw Exception(ErrorCodes::INVALID_TABLE_OVERRIDE, "column {}: modifying default specifier is not allowed", backQuote(override_column->name)); + throw Exception(ErrorCodes::INVALID_TABLE_OVERRIDE, + "column {}: modifying default specifier is not allowed", + backQuote(override_column->name)); result.modified_columns.push_back({found->name, override_type}); /// TODO: validate that the original type can be converted to the overridden type } diff --git a/src/Interpreters/TablesStatus.cpp b/src/Interpreters/TablesStatus.cpp index 5d94624be85..005a4515c3a 100644 --- a/src/Interpreters/TablesStatus.cpp +++ b/src/Interpreters/TablesStatus.cpp @@ -35,9 +35,7 @@ void TableStatus::read(ReadBuffer & in) void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revision) const { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "Logical error: method TablesStatusRequest::write is called for unsupported server revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: method TablesStatusRequest::write is called for unsupported server revision"); writeVarUInt(tables.size(), out); for (const auto & table_name : tables) @@ -50,15 +48,13 @@ void TablesStatusRequest::write(WriteBuffer & out, UInt64 server_protocol_revisi void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision) { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusRequest::read is called for unsupported client revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusRequest::read is called for unsupported client revision"); size_t size = 0; readVarUInt(size, in); if (size > DEFAULT_MAX_STRING_SIZE) - throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large collection size."); for (size_t i = 0; i < size; ++i) { @@ -72,9 +68,7 @@ void TablesStatusRequest::read(ReadBuffer & in, UInt64 client_protocol_revision) void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revision) const { if (client_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusResponse::write is called for unsupported client revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusResponse::write is called for unsupported client revision"); writeVarUInt(table_states_by_id.size(), out); for (const auto & kv: table_states_by_id) @@ -91,15 +85,13 @@ void TablesStatusResponse::write(WriteBuffer & out, UInt64 client_protocol_revis void TablesStatusResponse::read(ReadBuffer & in, UInt64 server_protocol_revision) { if (server_protocol_revision < DBMS_MIN_REVISION_WITH_TABLES_STATUS) - throw Exception( - "method TablesStatusResponse::read is called for unsupported server revision", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "method TablesStatusResponse::read is called for unsupported server revision"); size_t size = 0; readVarUInt(size, in); if (size > DEFAULT_MAX_STRING_SIZE) - throw Exception("Too large collection size.", ErrorCodes::TOO_LARGE_ARRAY_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large collection size."); for (size_t i = 0; i < size; ++i) { diff --git a/src/Interpreters/TemporaryDataOnDisk.cpp b/src/Interpreters/TemporaryDataOnDisk.cpp index 9e9389451b7..e183124cadf 100644 --- a/src/Interpreters/TemporaryDataOnDisk.cpp +++ b/src/Interpreters/TemporaryDataOnDisk.cpp @@ -52,13 +52,13 @@ TemporaryFileStream & TemporaryDataOnDisk::createStream(const Block & header, si else if (volume) return createStreamToRegularFile(header, max_file_size); - throw Exception("TemporaryDataOnDiskScope has no cache and no volume", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache and no volume"); } TemporaryFileStream & TemporaryDataOnDisk::createStreamToCacheFile(const Block & header, size_t max_file_size) { if (!file_cache) - throw Exception("TemporaryDataOnDiskScope has no cache", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no cache"); auto holder = file_cache->set(FileSegment::Key::random(), 0, std::max(10_MiB, max_file_size), CreateFileSegmentSettings(FileSegmentKind::Temporary, /* unbounded */ true)); @@ -70,14 +70,14 @@ TemporaryFileStream & TemporaryDataOnDisk::createStreamToCacheFile(const Block & TemporaryFileStream & TemporaryDataOnDisk::createStreamToRegularFile(const Block & header, size_t max_file_size) { if (!volume) - throw Exception("TemporaryDataOnDiskScope has no volume", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TemporaryDataOnDiskScope has no volume"); DiskPtr disk; if (max_file_size > 0) { auto reservation = volume->reserve(max_file_size); if (!reservation) - throw Exception("Not enough space on temporary disk", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space on temporary disk"); disk = reservation->getDisk(); } else @@ -131,7 +131,7 @@ struct TemporaryFileStream::OutputWriter size_t write(const Block & block) { if (finalized) - throw Exception("Cannot write to finalized stream", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot write to finalized stream"); size_t written_bytes = out_writer.write(block); num_rows += block.rows(); return written_bytes; @@ -140,7 +140,7 @@ struct TemporaryFileStream::OutputWriter void flush() { if (finalized) - throw Exception("Cannot flush finalized stream", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot flush finalized stream"); out_compressed_buf.next(); out_buf->next(); @@ -233,7 +233,7 @@ TemporaryFileStream::TemporaryFileStream(FileSegmentsHolder && segments_, const size_t TemporaryFileStream::write(const Block & block) { if (!out_writer) - throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been finished"); updateAllocAndCheck(); size_t bytes_written = out_writer->write(block); @@ -243,7 +243,7 @@ size_t TemporaryFileStream::write(const Block & block) void TemporaryFileStream::flush() { if (!out_writer) - throw Exception("Writing has been finished", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Writing has been finished"); out_writer->flush(); } diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index 4b757e0be7e..816b03f3a0e 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -151,12 +151,12 @@ void ThreadStatus::attachQuery(const ThreadGroupStatusPtr & thread_group_, bool if (thread_state == ThreadState::AttachedToQuery) { if (check_detached) - throw Exception("Can't attach query to the thread, it is already attached", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't attach query to the thread, it is already attached"); return; } if (!thread_group_) - throw Exception("Attempt to attach to nullptr thread group", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to attach to nullptr thread group"); setupState(thread_group_); } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 36691885459..aeb912ddfbb 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -125,8 +125,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, if (data.unknownColumn(table_pos, identifier)) { String table_name = data.tables[table_pos].table.getQualifiedNamePrefix(false); - throw Exception("There's no column '" + identifier.name() + "' in table '" + table_name + "'", - ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "There's no column '{}' in table '{}'", identifier.name(), table_name); } IdentifierSemantic::setMembership(identifier, table_pos); @@ -159,7 +158,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTFunction & node, const ASTPtr &, D void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, const ASTPtr &, Data & data) { if (!node.qualifier) - throw Exception("Logical error: qualified asterisk must have a qualifier", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: qualified asterisk must have a qualifier"); /// @note it could contain table alias as table name. DatabaseAndTableWithAlias db_and_table(node.qualifier); @@ -168,7 +167,7 @@ void TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAsterisk & node, co if (db_and_table.satisfies(known_table.table, true)) return; - throw Exception("Unknown qualified identifier: " + node.qualifier->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown qualified identifier: {}", node.qualifier->getAliasOrColumnName()); } void TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data & data) @@ -218,7 +217,7 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt if (child->as() || child->as() || child->as()) { if (tables_with_columns.empty()) - throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "An asterisk cannot be replaced with empty columns."); has_asterisk = true; } else if (const auto * qa = child->as()) @@ -349,8 +348,8 @@ void TranslateQualifiedNamesMatcher::extractJoinUsingColumns(ASTPtr ast, Data & { String alias = key->tryGetAlias(); if (alias.empty()) - throw Exception("Wrong key in USING. Expected identifier or alias, got: " + key->getID(), - ErrorCodes::UNSUPPORTED_JOIN_KEYS); + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Wrong key in USING. Expected identifier or alias, got: {}", + key->getID()); data.join_using_columns.insert(alias); } } diff --git a/src/Interpreters/TreeCNFConverter.cpp b/src/Interpreters/TreeCNFConverter.cpp index 8812e90a5f0..d036c6728fe 100644 --- a/src/Interpreters/TreeCNFConverter.cpp +++ b/src/Interpreters/TreeCNFConverter.cpp @@ -49,7 +49,7 @@ void splitMultiLogic(ASTPtr & node) if (func && (func->name == "and" || func->name == "or")) { if (func->arguments->children.size() < 2) - throw Exception("Bad AND or OR function. Expected at least 2 arguments", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Bad AND or OR function. Expected at least 2 arguments"); if (func->arguments->children.size() > 2) { @@ -82,7 +82,7 @@ void traversePushNot(ASTPtr & node, bool add_negation) if (add_negation) { if (func->arguments->children.size() != 2) - throw Exception("Bad AND or OR function. Expected at least 2 arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Bad AND or OR function. Expected at least 2 arguments"); /// apply De Morgan's Law node = makeASTFunction( @@ -98,7 +98,7 @@ void traversePushNot(ASTPtr & node, bool add_negation) else if (func && func->name == "not") { if (func->arguments->children.size() != 1) - throw Exception("Bad NOT function. Expected 1 argument", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Bad NOT function. Expected 1 argument"); /// delete NOT node = func->arguments->children[0]->clone(); @@ -189,7 +189,7 @@ void traverseCNF(const ASTPtr & node, CNFQuery::AndGroup & and_group, CNFQuery:: else if (func && func->name == "not") { if (func->arguments->children.size() != 1) - throw Exception("Bad NOT function. Expected 1 argument", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Bad NOT function. Expected 1 argument"); or_group.insert(CNFQuery::AtomicFormula{true, func->arguments->children.front()}); } else diff --git a/src/Interpreters/TreeOptimizer.cpp b/src/Interpreters/TreeOptimizer.cpp index 6a8c9dc7dbd..a63d3349e08 100644 --- a/src/Interpreters/TreeOptimizer.cpp +++ b/src/Interpreters/TreeOptimizer.cpp @@ -434,7 +434,7 @@ void optimizeMonotonousFunctionsInOrderBy(ASTSelectQuery * select_query, Context auto * order_by_element = child->as(); if (!order_by_element || order_by_element->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (order_by_element->with_fill) return; @@ -513,7 +513,7 @@ void optimizeRedundantFunctionsInOrderBy(const ASTSelectQuery * select_query, Co auto * order_by_element = child->as(); if (!order_by_element || order_by_element->children.empty()) - throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE, "Bad ORDER BY expression AST"); if (order_by_element->with_fill) return; @@ -747,7 +747,7 @@ void TreeOptimizer::apply(ASTPtr & query, TreeRewriterResult & result, auto * select_query = query->as(); if (!select_query) - throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); if (settings.optimize_functions_to_subcolumns && result.storage_snapshot && result.storage->supportsSubcolumns()) optimizeFunctionsToSubcolumns(query, result.storage_snapshot->metadata); diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 349855987a0..3dfc22f20ff 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -208,73 +208,8 @@ struct CustomizeAggregateFunctionsMoveSuffixData } }; -struct FuseSumCountAggregates -{ - std::vector sums {}; - std::vector counts {}; - std::vector avgs {}; - - void addFuncNode(ASTFunction * func) - { - if (func->name == "sum") - sums.push_back(func); - else if (func->name == "count") - counts.push_back(func); - else - { - assert(func->name == "avg"); - avgs.push_back(func); - } - } - - bool canBeFused() const - { - // Need at least two different kinds of functions to fuse. - if (sums.empty() && counts.empty()) - return false; - if (sums.empty() && avgs.empty()) - return false; - if (counts.empty() && avgs.empty()) - return false; - return true; - } -}; - -struct FuseSumCountAggregatesVisitorData -{ - using TypeToVisit = ASTFunction; - - std::unordered_map fuse_map; - - void visit(ASTFunction & func, ASTPtr &) - { - if (func.name == "sum" || func.name == "avg" || func.name == "count") - { - if (func.arguments->children.empty()) - return; - - // Probably we can extend it to match count() for non-nullable argument - // to sum/avg with any other argument. Now we require strict match. - const auto argument = func.arguments->children.at(0)->getColumnName(); - auto it = fuse_map.find(argument); - if (it != fuse_map.end()) - { - it->second.addFuncNode(&func); - } - else - { - FuseSumCountAggregates funcs{}; - funcs.addFuncNode(&func); - fuse_map[argument] = funcs; - } - } - } -}; - using CustomizeAggregateFunctionsOrNullVisitor = InDepthNodeVisitor, true>; using CustomizeAggregateFunctionsMoveOrNullVisitor = InDepthNodeVisitor, true>; -using FuseSumCountAggregatesVisitor = InDepthNodeVisitor, true>; - struct ExistsExpressionData { @@ -373,53 +308,7 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query /// This may happen after expansion of COLUMNS('regexp'). if (select_query.select()->children.empty()) - throw Exception("Empty list of columns in SELECT query", ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); -} - -// Replaces one avg/sum/count function with an appropriate expression with -// sumCount(). -void replaceWithSumCount(String column_name, ASTFunction & func) -{ - auto func_base = makeASTFunction("sumCount", std::make_shared(column_name)); - auto exp_list = std::make_shared(); - if (func.name == "sum" || func.name == "count") - { - /// Rewrite "sum" to sumCount().1, rewrite "count" to sumCount().2 - UInt8 idx = (func.name == "sum" ? 1 : 2); - func.name = "tupleElement"; - exp_list->children.push_back(func_base); - exp_list->children.push_back(std::make_shared(idx)); - } - else - { - /// Rewrite "avg" to sumCount().1 / sumCount().2 - auto new_arg1 = makeASTFunction("tupleElement", func_base, std::make_shared(UInt8(1))); - auto new_arg2 = makeASTFunction("CAST", - makeASTFunction("tupleElement", func_base, std::make_shared(static_cast(2))), - std::make_shared("Float64")); - - func.name = "divide"; - exp_list->children.push_back(new_arg1); - exp_list->children.push_back(new_arg2); - } - func.arguments = exp_list; - func.children.push_back(func.arguments); -} - -void fuseSumCountAggregates(std::unordered_map & fuse_map) -{ - for (auto & it : fuse_map) - { - if (it.second.canBeFused()) - { - for (auto & func: it.second.sums) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.avgs) - replaceWithSumCount(it.first, *func); - for (auto & func: it.second.counts) - replaceWithSumCount(it.first, *func); - } - } + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, "Empty list of columns in SELECT query"); } bool hasArrayJoin(const ASTPtr & ast) @@ -616,7 +505,7 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS if (result.array_join_result_to_source.empty()) { if (select_query->arrayJoinExpressionList().first->children.empty()) - throw DB::Exception("ARRAY JOIN requires an argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw DB::Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "ARRAY JOIN requires an argument"); ASTPtr expr = select_query->arrayJoinExpressionList().first->children.at(0); String source_name = expr->getColumnName(); @@ -641,7 +530,7 @@ void getArrayJoinedColumns(ASTPtr & query, TreeRewriterResult & result, const AS } } if (!found) - throw Exception("No columns in nested table " + source_name, ErrorCodes::EMPTY_NESTED_TABLE); + throw Exception(ErrorCodes::EMPTY_NESTED_TABLE, "No columns in nested table {}", source_name); } } } @@ -662,8 +551,8 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul else if (join_default_strictness == JoinStrictness::All) table_join.strictness = JoinStrictness::All; else - throw Exception("Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty", - DB::ErrorCodes::EXPECTED_ALL_OR_ANY); + throw Exception(DB::ErrorCodes::EXPECTED_ALL_OR_ANY, + "Expected ANY or ALL in JOIN section, because setting (join_default_strictness) is empty"); } if (old_any) @@ -681,7 +570,7 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul else { if (table_join.strictness == JoinStrictness::Any && table_join.kind == JoinKind::Full) - throw Exception("ANY FULL JOINs are not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "ANY FULL JOINs are not implemented"); } analyzed_join->getTableJoin() = table_join; @@ -1307,7 +1196,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( { auto * select_query = query->as(); if (!select_query) - throw Exception("Select analyze for not select asts.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Select analyze for not select asts."); size_t subquery_depth = select_options.subquery_depth; bool remove_duplicates = select_options.remove_duplicates; @@ -1471,7 +1360,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( bool is_create_parameterized_view) const { if (query->as()) - throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not select analyze for select asts."); const auto & settings = getContext()->getSettingsRef(); @@ -1544,17 +1433,6 @@ void TreeRewriter::normalize( CustomizeGlobalNotInVisitor(data_global_not_null_in).visit(query); } - // Try to fuse sum/avg/count with identical arguments to one sumCount call, - // if we have at least two different functions. E.g. we will replace sum(x) - // and count(x) with sumCount(x).1 and sumCount(x).2, and sumCount() will - // be calculated only once because of CSE. - if (settings.optimize_fuse_sum_count_avg && settings.optimize_syntax_fuse_functions) - { - FuseSumCountAggregatesVisitor::Data data; - FuseSumCountAggregatesVisitor(data).visit(query); - fuseSumCountAggregates(data.fuse_map); - } - /// Rewrite all aggregate functions to add -OrNull suffix to them if (settings.aggregate_functions_null_for_empty) { diff --git a/src/Interpreters/addTypeConversionToAST.cpp b/src/Interpreters/addTypeConversionToAST.cpp index 2f766880253..65feff30f4a 100644 --- a/src/Interpreters/addTypeConversionToAST.cpp +++ b/src/Interpreters/addTypeConversionToAST.cpp @@ -41,7 +41,7 @@ ASTPtr addTypeConversionToAST(ASTPtr && ast, const String & type_name, const Nam for (const auto & action : actions->getActions()) if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + throw Exception(ErrorCodes::THERE_IS_NO_DEFAULT_VALUE, "Unsupported default value that requires ARRAY JOIN action"); auto block = actions->getSampleBlock(); diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 00b01781007..9e4f543db43 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -86,7 +86,7 @@ Field convertIntToDecimalType(const Field & from, const DataTypeDecimal & typ { From value = from.get(); if (!type.canStoreWhole(value)) - throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); T scaled_value = type.getScaleMultiplier() * T(static_cast(value)); return DecimalField(scaled_value, type.getScale()); @@ -114,7 +114,7 @@ Field convertFloatToDecimalType(const Field & from, const DataTypeDecimal & t { From value = from.get(); if (!type.canStoreWhole(value)) - throw Exception("Number is too big to place in " + type.getName(), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Number is too big to place in {}", type.getName()); //String sValue = convertFieldToString(from); //int fromScale = sValue.length()- sValue.find('.') - 1; @@ -321,8 +321,8 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID size_t dst_tuple_size = type_tuple->getElements().size(); if (dst_tuple_size != src_tuple_size) - throw Exception("Bad size of tuple in IN or VALUES section. Expected size: " - + toString(dst_tuple_size) + ", actual size: " + toString(src_tuple_size), ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Bad size of tuple in IN or VALUES section. " + "Expected size: {}, actual size: {}", dst_tuple_size, src_tuple_size); Tuple res(dst_tuple_size); bool have_unconvertible_element = false; @@ -401,7 +401,7 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID const auto & name = src.get().name; if (agg_func_type->getName() != name) - throw Exception("Cannot convert " + name + " to " + agg_func_type->getName(), ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot convert {} to {}", name, agg_func_type->getName()); return src; } diff --git a/src/Interpreters/evaluateConstantExpression.cpp b/src/Interpreters/evaluateConstantExpression.cpp index a5cdbf78070..6aa89426916 100644 --- a/src/Interpreters/evaluateConstantExpression.cpp +++ b/src/Interpreters/evaluateConstantExpression.cpp @@ -86,12 +86,14 @@ std::pair> evaluateConstantExpression(co expr_for_constant_folding->execute(block_with_constants); if (!block_with_constants || block_with_constants.rows() == 0) - throw Exception("Logical error: empty block after evaluation of constant expression for IN, VALUES or LIMIT or aggregate function parameter", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Logical error: empty block after evaluation " + "of constant expression for IN, VALUES or LIMIT or aggregate function parameter"); if (!block_with_constants.has(name)) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column not found): {}", name); + "Element of set in IN, VALUES or LIMIT or aggregate function parameter " + "is not a constant expression (result column not found): {}", name); const ColumnWithTypeAndName & result = block_with_constants.getByName(name); const IColumn & result_column = *result.column; @@ -99,7 +101,8 @@ std::pair> evaluateConstantExpression(co /// Expressions like rand() or now() are not constant if (!isColumnConst(result_column)) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Element of set in IN, VALUES or LIMIT or aggregate function parameter is not a constant expression (result column is not const): {}", name); + "Element of set in IN, VALUES or LIMIT or aggregate function parameter " + "is not a constant expression (result column is not const): {}", name); return std::make_pair(result_column[0], result.type); } diff --git a/src/Interpreters/executeDDLQueryOnCluster.cpp b/src/Interpreters/executeDDLQueryOnCluster.cpp index 454474dde2b..bd10af953ff 100644 --- a/src/Interpreters/executeDDLQueryOnCluster.cpp +++ b/src/Interpreters/executeDDLQueryOnCluster.cpp @@ -68,18 +68,18 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, auto * query = dynamic_cast(query_ptr.get()); if (!query) { - throw Exception("Distributed execution is not supported for such DDL queries", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Distributed execution is not supported for such DDL queries"); } if (!context->getSettingsRef().allow_distributed_ddl) - throw Exception("Distributed DDL queries are prohibited for the user", ErrorCodes::QUERY_IS_PROHIBITED); + throw Exception(ErrorCodes::QUERY_IS_PROHIBITED, "Distributed DDL queries are prohibited for the user"); if (const auto * query_alter = query_ptr->as()) { for (const auto & command : query_alter->command_list->children) { if (!isSupportedAlterType(command->as().type)) - throw Exception("Unsupported type of ALTER query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported type of ALTER query"); } } @@ -100,7 +100,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, /// Enumerate hosts which will be used to send query. auto addresses = cluster->filterAddressesByShardOrReplica(params.only_shard_num, params.only_replica_num); if (addresses.empty()) - throw Exception("No hosts defined to execute distributed DDL query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No hosts defined to execute distributed DDL query"); std::vector hosts; hosts.reserve(addresses.size()); @@ -133,7 +133,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, ContextPtr context, assert(use_local_default_database || !host_default_databases.empty()); if (use_local_default_database && !host_default_databases.empty()) - throw Exception("Mixed local default DB and shard default DB in DDL query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mixed local default DB and shard default DB in DDL query"); if (use_local_default_database) { diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index a5fd2a121b0..2fcb9c41da7 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -707,10 +708,52 @@ static std::tuple executeQueryImpl( if (OpenTelemetry::CurrentContext().isTraceEnabled()) { auto * raw_interpreter_ptr = interpreter.get(); - std::string class_name(demangle(typeid(*raw_interpreter_ptr).name())); + String class_name(demangle(typeid(*raw_interpreter_ptr).name())); span = std::make_unique(class_name + "::execute()"); } + res = interpreter->execute(); + + /// Try to read (SELECT) query result from query result cache (if it is enabled) + auto query_result_cache = context->getQueryResultCache(); + bool read_result_from_query_result_cache = false; /// a query must not read from *and* write to the query result cache at the same time + if (query_result_cache != nullptr + && (settings.enable_experimental_query_result_cache || settings.enable_experimental_query_result_cache_passive_usage) + && res.pipeline.pulling()) + { + QueryResultCache::Key key( + ast, res.pipeline.getHeader(), + std::make_optional(context->getUserName()), + std::chrono::system_clock::now() + std::chrono::seconds(settings.query_result_cache_ttl)); + QueryResultCache::Reader reader = query_result_cache->createReader(key); + if (reader.hasCacheEntryForKey()) + { + res.pipeline = QueryPipeline(reader.getPipe()); + read_result_from_query_result_cache = true; + } + } + + /// Try to write (SELECT) query result into query result cache (if it is enabled) + if (!read_result_from_query_result_cache + && query_result_cache != nullptr + && settings.enable_experimental_query_result_cache + && res.pipeline.pulling() + && (!astContainsNonDeterministicFunctions(ast, context) || settings.query_result_cache_store_results_of_queries_with_nondeterministic_functions)) + { + QueryResultCache::Key key( + ast, res.pipeline.getHeader(), + settings.query_result_cache_share_between_users ? std::nullopt : std::make_optional(context->getUserName()), + std::chrono::system_clock::now() + std::chrono::seconds(settings.query_result_cache_ttl)); + + const size_t num_query_runs = query_result_cache->recordQueryRun(key); + if (num_query_runs > settings.query_result_cache_min_query_runs) + { + auto stream_in_query_result_cache_transform = std::make_shared(res.pipeline.getHeader(), query_result_cache, key, + std::chrono::milliseconds(context->getSettings().query_result_cache_min_query_duration.totalMilliseconds())); + res.pipeline.streamIntoQueryResultCache(stream_in_query_result_cache_transform); + } + } + } } @@ -858,6 +901,8 @@ static std::tuple executeQueryImpl( auto finish_callback = [elem, context, ast, + enable_experimental_query_result_cache = settings.enable_experimental_query_result_cache, + query_result_cache_store_results_of_queries_with_nondeterministic_functions = settings.query_result_cache_store_results_of_queries_with_nondeterministic_functions, log_queries, log_queries_min_type = settings.log_queries_min_type, log_queries_min_query_duration_ms = settings.log_queries_min_query_duration_ms.totalMilliseconds(), @@ -867,6 +912,16 @@ static std::tuple executeQueryImpl( pulling_pipeline = pipeline.pulling(), query_span](QueryPipeline & query_pipeline) mutable { + /// Write query result into query result cache (if enabled) + auto query_result_cache = context->getQueryResultCache(); + if (query_result_cache != nullptr + && pulling_pipeline + && enable_experimental_query_result_cache + && (!astContainsNonDeterministicFunctions(ast, context) || query_result_cache_store_results_of_queries_with_nondeterministic_functions)) + { + query_pipeline.finalizeWriteInQueryResultCache(); + } + QueryStatusPtr process_list_elem = context->getProcessListElement(); if (process_list_elem) @@ -1199,7 +1254,7 @@ void executeQuery( if (ast_query_with_output && ast_query_with_output->out_file) { if (!allow_into_outfile) - throw Exception("INTO OUTFILE is not allowed", ErrorCodes::INTO_OUTFILE_NOT_ALLOWED); + throw Exception(ErrorCodes::INTO_OUTFILE_NOT_ALLOWED, "INTO OUTFILE is not allowed"); const auto & out_file = typeid_cast(*ast_query_with_output->out_file).value.safeGet(); diff --git a/src/Interpreters/getClusterName.cpp b/src/Interpreters/getClusterName.cpp index dc3e9b41628..ea053d356d2 100644 --- a/src/Interpreters/getClusterName.cpp +++ b/src/Interpreters/getClusterName.cpp @@ -21,7 +21,7 @@ std::string getClusterName(const IAST & node) { auto name = tryGetClusterName(node); if (!name) - throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal expression instead of cluster name."); return std::move(name).value(); } diff --git a/src/Interpreters/getHeaderForProcessingStage.cpp b/src/Interpreters/getHeaderForProcessingStage.cpp index 48acfb5512a..d40cbd9366e 100644 --- a/src/Interpreters/getHeaderForProcessingStage.cpp +++ b/src/Interpreters/getHeaderForProcessingStage.cpp @@ -132,7 +132,7 @@ Block getHeaderForProcessingStage( return InterpreterSelectQuery(query, context, std::move(pipe), SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } } - throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical Error: unknown processed stage."); } } diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index cc38ea76d5d..5f00be07fa5 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -50,7 +50,7 @@ std::shared_ptr interpretSubquery( const auto * table = table_expression->as(); if (!subquery && !table && !function) - throw Exception("Table expression is undefined, Method: ExpressionAnalyzer::interpretSubquery." , ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table expression is undefined, Method: ExpressionAnalyzer::interpretSubquery."); /** The subquery in the IN / JOIN section does not have any restrictions on the maximum size of the result. * Because the result of this query is not the result of the entire query. diff --git a/src/Interpreters/misc.h b/src/Interpreters/misc.h index cae2691ca1f..c009808de3f 100644 --- a/src/Interpreters/misc.h +++ b/src/Interpreters/misc.h @@ -42,8 +42,9 @@ inline bool checkFunctionIsInOrGlobalInOperator(const ASTFunction & func) { size_t num_arguments = func.arguments->children.size(); if (num_arguments != 2) - throw Exception("Wrong number of arguments passed to function in. Expected: 2, passed: " + std::to_string(num_arguments), - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Wrong number of arguments passed to function in. Expected: 2, passed: {}", + num_arguments); return true; } diff --git a/src/Interpreters/parseColumnsListForTableFunction.cpp b/src/Interpreters/parseColumnsListForTableFunction.cpp index c44259a3ccc..9e6326b431a 100644 --- a/src/Interpreters/parseColumnsListForTableFunction.cpp +++ b/src/Interpreters/parseColumnsListForTableFunction.cpp @@ -83,7 +83,7 @@ ColumnsDescription parseColumnsListFromString(const std::string & structure, con auto * columns_list = dynamic_cast(columns_list_raw.get()); if (!columns_list) - throw Exception("Could not cast AST to ASTExpressionList", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast AST to ASTExpressionList"); auto columns = InterpreterCreateQuery::getColumnsDescription(*columns_list, context, false); auto validation_settings = DataTypeValidationSettings(context->getSettingsRef()); diff --git a/src/Interpreters/sortBlock.cpp b/src/Interpreters/sortBlock.cpp index 4343e8c7fc6..2ae5edc43b9 100644 --- a/src/Interpreters/sortBlock.cpp +++ b/src/Interpreters/sortBlock.cpp @@ -103,10 +103,8 @@ ColumnsWithSortDescriptions getColumnsWithSortDescription(const Block & block, c if (isCollationRequired(sort_column_description)) { if (!column->isCollationSupported()) - throw Exception( - "Collations could be specified only for String, LowCardinality(String), Nullable(String) or for Array or Tuple, " - "containing them.", - ErrorCodes::BAD_COLLATION); + throw Exception(ErrorCodes::BAD_COLLATION, "Collations could be specified only for String, LowCardinality(String), " + "Nullable(String) or for Array or Tuple, containing them."); } result.emplace_back(ColumnWithSortDescription{column, sort_column_description, isColumnConst(*column)}); diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index c488a54c1ec..7990c538c03 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -10,16 +10,16 @@ namespace DB { /// High-order function to run callbacks (functions with 'void()' signature) somewhere asynchronously. -template -using ThreadPoolCallbackRunner = std::function(std::function &&, size_t priority)>; +template > +using ThreadPoolCallbackRunner = std::function(Callback &&, int64_t priority)>; /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'. -template -ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) +template > +ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) { - return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future + return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](Callback && callback, int64_t priority) mutable -> std::future { - auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result + auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() mutable -> Result { if (thread_group) CurrentThread::attachTo(thread_group); @@ -43,4 +43,11 @@ ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, con }; } +template +std::future scheduleFromThreadPool(T && task, ThreadPool & pool, const std::string & thread_name, int64_t priority = 0) +{ + auto schedule = threadPoolCallbackRunner(pool, thread_name); + return schedule(std::move(task), priority); +} + } diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index 80801278963..5d347446d37 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -487,7 +487,7 @@ void ASTAlterCommand::formatImpl(const FormatSettings & settings, FormatState & rename_to->formatImpl(settings, state, frame); } else - throw Exception("Unexpected type of ALTER", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "Unexpected type of ALTER"); } bool ASTAlterQuery::isOneCommandTypeOnly(const ASTAlterCommand::Type & type) const diff --git a/src/Parsers/ASTColumnsTransformers.cpp b/src/Parsers/ASTColumnsTransformers.cpp index f3bbeb6167b..3bed3428d38 100644 --- a/src/Parsers/ASTColumnsTransformers.cpp +++ b/src/Parsers/ASTColumnsTransformers.cpp @@ -263,9 +263,8 @@ void ASTColumnsExceptTransformer::transform(ASTs & nodes) const std::for_each(expected_columns.begin(), expected_columns.end(), [&](String x) { expected_columns_str += (" " + x) ; }); - throw Exception( - "Columns transformer EXCEPT expects following column(s) :" + expected_columns_str, - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Columns transformer EXCEPT expects following column(s) :{}", + expected_columns_str); } } @@ -377,9 +376,8 @@ void ASTColumnsReplaceTransformer::transform(ASTs & nodes) const { auto & replacement = replace_child->as(); if (replace_map.find(replacement.name) != replace_map.end()) - throw Exception( - "Expressions in columns transformer REPLACE should not contain the same replacement more than once", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Expressions in columns transformer REPLACE should not contain the same replacement more than once"); replace_map.emplace(replacement.name, replacement.expr); } @@ -419,9 +417,8 @@ void ASTColumnsReplaceTransformer::transform(ASTs & nodes) const expected_columns += ", "; expected_columns += elem.first; } - throw Exception( - "Columns transformer REPLACE expects following column(s) : " + expected_columns, - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Columns transformer REPLACE expects following column(s) : {}", + expected_columns); } } diff --git a/src/Parsers/ASTDropQuery.cpp b/src/Parsers/ASTDropQuery.cpp index 11c1dd4c47a..93a4b547025 100644 --- a/src/Parsers/ASTDropQuery.cpp +++ b/src/Parsers/ASTDropQuery.cpp @@ -21,7 +21,7 @@ String ASTDropQuery::getID(char delim) const else if (kind == ASTDropQuery::Kind::Truncate) return "TruncateQuery" + (delim + getDatabase()) + delim + getTable(); else - throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Not supported kind of drop query."); } ASTPtr ASTDropQuery::clone() const @@ -42,7 +42,7 @@ void ASTDropQuery::formatQueryImpl(const FormatSettings & settings, FormatState else if (kind == ASTDropQuery::Kind::Truncate) settings.ostr << "TRUNCATE "; else - throw Exception("Not supported kind of drop query.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Not supported kind of drop query."); if (temporary) settings.ostr << "TEMPORARY "; diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 4ac4bb6144e..fccef01a2bc 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -367,7 +367,7 @@ namespace void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const { if (name == "view") - throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Table function view cannot be used as an expression"); /// If function can be converted to literal it will be parsed as literal after formatting. /// In distributed query it may lead to mismathed column names. @@ -1020,7 +1020,9 @@ String getFunctionName(const IAST * ast) String res; if (tryGetFunctionNameInto(ast, res)) return res; - throw Exception(ast ? queryToString(*ast) + " is not an function" : "AST node is nullptr", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + if (ast) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "{} is not an function", queryToString(*ast)); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "AST node is nullptr"); } std::optional tryGetFunctionName(const IAST * ast) diff --git a/src/Parsers/ASTIdentifier.cpp b/src/Parsers/ASTIdentifier.cpp index 8651a52f2c1..042b4d9085d 100644 --- a/src/Parsers/ASTIdentifier.cpp +++ b/src/Parsers/ASTIdentifier.cpp @@ -255,7 +255,9 @@ String getIdentifierName(const IAST * ast) String res; if (tryGetIdentifierNameInto(ast, res)) return res; - throw Exception(ast ? queryToString(*ast) + " is not an identifier" : "AST node is nullptr", ErrorCodes::UNEXPECTED_AST_STRUCTURE); + if (ast) + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "{} is not an identifier", queryToString(*ast)); + throw Exception(ErrorCodes::UNEXPECTED_AST_STRUCTURE, "AST node is nullptr"); } std::optional tryGetIdentifierName(const IAST * ast) diff --git a/src/Parsers/ASTInsertQuery.cpp b/src/Parsers/ASTInsertQuery.cpp index dceec83e763..ecb2d4e331b 100644 --- a/src/Parsers/ASTInsertQuery.cpp +++ b/src/Parsers/ASTInsertQuery.cpp @@ -160,7 +160,7 @@ static void tryFindInputFunctionImpl(const ASTPtr & ast, ASTPtr & input_function if (table_function_ast->name == "input") { if (input_function) - throw Exception("You can use 'input()' function only once per request.", ErrorCodes::INVALID_USAGE_OF_INPUT); + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "You can use 'input()' function only once per request."); input_function = ast; } } diff --git a/src/Parsers/ASTProjectionSelectQuery.cpp b/src/Parsers/ASTProjectionSelectQuery.cpp index 9b85fcb2dac..da3d9286f0a 100644 --- a/src/Parsers/ASTProjectionSelectQuery.cpp +++ b/src/Parsers/ASTProjectionSelectQuery.cpp @@ -115,7 +115,7 @@ void ASTProjectionSelectQuery::setExpression(Expression expr, ASTPtr && ast) ASTPtr & ASTProjectionSelectQuery::getExpression(Expression expr) { if (!positions.contains(expr)) - throw Exception("Get expression before set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Get expression before set"); return children[positions[expr]]; } diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 838b2664eb3..fe8ebacec15 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -258,7 +258,7 @@ static const ASTArrayJoin * getFirstArrayJoin(const ASTSelectQuery & select) if (!array_join) array_join = tables_element.array_join->as(); else - throw Exception("Support for more than one ARRAY JOIN in query is not implemented", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Support for more than one ARRAY JOIN in query is not implemented"); } } @@ -283,7 +283,7 @@ static const ASTTablesInSelectQueryElement * getFirstTableJoin(const ASTSelectQu if (!joined_table) joined_table = &tables_element; else - throw Exception("Multiple JOIN does not support the query.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Multiple JOIN does not support the query."); } } @@ -460,7 +460,7 @@ void ASTSelectQuery::setExpression(Expression expr, ASTPtr && ast) ASTPtr & ASTSelectQuery::getExpression(Expression expr) { if (!positions.contains(expr)) - throw Exception("Get expression before set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Get expression before set"); return children[positions[expr]]; } diff --git a/src/Parsers/ASTSystemQuery.h b/src/Parsers/ASTSystemQuery.h index ae08fe464ad..499807df7df 100644 --- a/src/Parsers/ASTSystemQuery.h +++ b/src/Parsers/ASTSystemQuery.h @@ -25,6 +25,7 @@ public: DROP_INDEX_MARK_CACHE, DROP_INDEX_UNCOMPRESSED_CACHE, DROP_MMAP_CACHE, + DROP_QUERY_RESULT_CACHE, #if USE_EMBEDDED_COMPILER DROP_COMPILED_EXPRESSION_CACHE, #endif diff --git a/src/Parsers/Access/ASTCreateUserQuery.cpp b/src/Parsers/Access/ASTCreateUserQuery.cpp index a59b5dd472c..b0d4aef38b8 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.cpp +++ b/src/Parsers/Access/ASTCreateUserQuery.cpp @@ -90,7 +90,7 @@ namespace case AuthenticationType::NO_PASSWORD: [[fallthrough]]; case AuthenticationType::MAX: - throw Exception("AST: Unexpected authentication type " + toString(auth_type), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "AST: Unexpected authentication type {}", toString(auth_type)); } if (password && !settings.show_secrets) diff --git a/src/Parsers/Access/ASTGrantQuery.cpp b/src/Parsers/Access/ASTGrantQuery.cpp index 1d15fc272cf..f92541ec672 100644 --- a/src/Parsers/Access/ASTGrantQuery.cpp +++ b/src/Parsers/Access/ASTGrantQuery.cpp @@ -116,9 +116,9 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F << (settings.hilite ? IAST::hilite_none : ""); if (!access_rights_elements.sameOptions()) - throw Exception("Elements of an ASTGrantQuery are expected to have the same options", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Elements of an ASTGrantQuery are expected to have the same options"); if (!access_rights_elements.empty() && access_rights_elements[0].is_partial_revoke && !is_revoke) - throw Exception("A partial revoke should be revoked, not granted", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "A partial revoke should be revoked, not granted"); bool grant_option = !access_rights_elements.empty() && access_rights_elements[0].grant_option; formatOnCluster(settings); @@ -136,7 +136,9 @@ void ASTGrantQuery::formatImpl(const FormatSettings & settings, FormatState &, F { roles->format(settings); if (!access_rights_elements.empty()) - throw Exception("ASTGrantQuery can contain either roles or access rights elements to grant or revoke, not both of them", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "ASTGrantQuery can contain either roles or access rights elements " + "to grant or revoke, not both of them"); } else formatElementsWithoutOptions(access_rights_elements, settings); diff --git a/src/Parsers/Access/ASTRowPolicyName.cpp b/src/Parsers/Access/ASTRowPolicyName.cpp index 280713fe9d9..4edfa61f10e 100644 --- a/src/Parsers/Access/ASTRowPolicyName.cpp +++ b/src/Parsers/Access/ASTRowPolicyName.cpp @@ -34,7 +34,7 @@ void ASTRowPolicyName::replaceEmptyDatabase(const String & current_database) void ASTRowPolicyNames::formatImpl(const FormatSettings & settings, FormatState &, FormatStateStacked) const { if (full_names.empty()) - throw Exception("No names of row policies in AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No names of row policies in AST"); bool same_short_name = true; if (full_names.size() > 1) diff --git a/src/Parsers/Access/ParserCreateQuotaQuery.cpp b/src/Parsers/Access/ParserCreateQuotaQuery.cpp index b86f0a6a572..a67051be398 100644 --- a/src/Parsers/Access/ParserCreateQuotaQuery.cpp +++ b/src/Parsers/Access/ParserCreateQuotaQuery.cpp @@ -71,8 +71,7 @@ namespace String all_types_str; for (auto kt : collections::range(QuotaKeyType::MAX)) all_types_str += String(all_types_str.empty() ? "" : ", ") + "'" + QuotaKeyTypeInfo::get(kt).name + "'"; - String msg = "Quota cannot be keyed by '" + name + "'. Expected one of the following identifiers: " + all_types_str; - throw Exception(msg, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Quota cannot be keyed by '{}'. Expected one of the following identifiers: {}", name, all_types_str); }); } diff --git a/src/Parsers/Access/ParserGrantQuery.cpp b/src/Parsers/Access/ParserGrantQuery.cpp index 43e1cedd34d..2211969c61e 100644 --- a/src/Parsers/Access/ParserGrantQuery.cpp +++ b/src/Parsers/Access/ParserGrantQuery.cpp @@ -168,13 +168,13 @@ namespace return false; if (!element.any_column) - throw Exception(old_flags.toString() + " cannot be granted on the column level", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted on the column level", old_flags.toString()); else if (!element.any_table) - throw Exception(old_flags.toString() + " cannot be granted on the table level", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted on the table level", old_flags.toString()); else if (!element.any_database) - throw Exception(old_flags.toString() + " cannot be granted on the database level", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted on the database level", old_flags.toString()); else - throw Exception(old_flags.toString() + " cannot be granted", ErrorCodes::INVALID_GRANT); + throw Exception(ErrorCodes::INVALID_GRANT, "{} cannot be granted", old_flags.toString()); }); } @@ -281,9 +281,9 @@ bool ParserGrantQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) parseOnCluster(pos, expected, cluster); if (grant_option && roles) - throw Exception("GRANT OPTION should be specified for access types", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "GRANT OPTION should be specified for access types"); if (admin_option && !elements.empty()) - throw Exception("ADMIN OPTION should be specified for roles", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "ADMIN OPTION should be specified for roles"); if (grant_option) { diff --git a/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp b/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp index aaa65450d20..15622062961 100644 --- a/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp +++ b/src/Parsers/Access/ParserShowCreateAccessEntityQuery.cpp @@ -150,7 +150,7 @@ bool ParserShowCreateAccessEntityQuery::parseImpl(Pos & pos, ASTPtr & node, Expe break; } case AccessEntityType::MAX: - throw Exception("Type " + toString(type) + " is not implemented in SHOW CREATE query", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Type {} is not implemented in SHOW CREATE query", toString(type)); } auto query = std::make_shared(); diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 231897605e0..855e452e3c7 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -121,7 +121,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) const auto & explain_query = explain_node->as(); if (explain_query.getTableFunction() || explain_query.getTableOverride()) - throw Exception("EXPLAIN in a subquery cannot have a table function or table override", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN in a subquery cannot have a table function or table override"); /// Replace subquery `(EXPLAIN SELECT ...)` /// with `(SELECT * FROM viewExplain("", "", SELECT ...))` @@ -132,7 +132,7 @@ bool ParserSubquery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (ASTPtr settings_ast = explain_query.getSettings()) { if (!settings_ast->as()) - throw Exception("EXPLAIN settings must be a SET query", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "EXPLAIN settings must be a SET query"); settings_str = queryToString(settings_ast); } @@ -868,7 +868,9 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (str_end == buf.c_str() + buf.size() && errno != ERANGE) { if (float_value < 0) - throw Exception("Logical error: token number cannot begin with minus, but parsed float number is less than zero.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Logical error: token number cannot begin with minus, " + "but parsed float number is less than zero."); if (negative) float_value = -float_value; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 2e20a68f9b1..054a22a0c3a 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -933,8 +933,8 @@ public: && contents_begin[9] >= '0' && contents_begin[9] <= '9') { std::string contents_str(contents_begin, contents_end - contents_begin); - throw Exception("Argument of function toDate is unquoted: toDate(" + contents_str + "), must be: toDate('" + contents_str + "')" - , ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Argument of function toDate is unquoted: " + "toDate({}), must be: toDate('{}')" , contents_str, contents_str); } if (allow_function_parameters && !parameters && ParserToken(TokenType::OpeningRoundBracket).ignore(pos, expected)) diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 31ba7ac4f86..3692a4c73e5 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -58,7 +58,7 @@ private: { const auto * cast_expression = assert_cast(function->arguments.get()); if (cast_expression->children.size() != 2) - throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Function CAST must have exactly two arguments"); if (const auto * cast_literal = cast_expression->children[0]->as()) { parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp index 064bcc9a59e..869c0969dd6 100644 --- a/src/Parsers/IAST.cpp +++ b/src/Parsers/IAST.cpp @@ -108,7 +108,7 @@ size_t IAST::checkSize(size_t max_size) const res += child->checkSize(max_size); if (res > max_size) - throw Exception("AST is too big. Maximum: " + toString(max_size), ErrorCodes::TOO_BIG_AST); + throw Exception(ErrorCodes::TOO_BIG_AST, "AST is too big. Maximum: {}", max_size); return res; } @@ -156,7 +156,7 @@ size_t IAST::checkDepthImpl(size_t max_depth) const stack.pop_back(); if (top.second >= max_depth) - throw Exception("AST is too deep. Maximum: " + toString(max_depth), ErrorCodes::TOO_DEEP_AST); + throw Exception(ErrorCodes::TOO_DEEP_AST, "AST is too deep. Maximum: {}", max_depth); res = std::max(res, top.second); @@ -218,8 +218,9 @@ void IAST::FormatSettings::writeIdentifier(const String & name) const case IdentifierQuotingStyle::None: { if (always_quote_identifiers) - throw Exception("Incompatible arguments: always_quote_identifiers = true && identifier_quoting_style == IdentifierQuotingStyle::None", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Incompatible arguments: always_quote_identifiers = true && " + "identifier_quoting_style == IdentifierQuotingStyle::None"); writeString(name, ostr); break; } @@ -258,7 +259,7 @@ void IAST::dumpTree(WriteBuffer & ostr, size_t indent) const writeChar('\n', ostr); for (const auto & child : children) { - if (!child) throw Exception("Can't dump nullptr child", ErrorCodes::UNKNOWN_ELEMENT_IN_AST); + if (!child) throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_AST, "Can't dump nullptr child"); child->dumpTree(ostr, indent + 1); } } diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h index fd987d4b48e..c1520a6fca7 100644 --- a/src/Parsers/IAST.h +++ b/src/Parsers/IAST.h @@ -48,12 +48,12 @@ public: virtual void appendColumnName(WriteBuffer &) const { - throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get name of not a column: {}", getID()); } virtual void appendColumnNameWithoutAlias(WriteBuffer &) const { - throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to get name of not a column: {}", getID()); } /** Get the alias, if any, or the canonical name of the column, if it is not. */ @@ -65,7 +65,7 @@ public: /** Set the alias. */ virtual void setAlias(const String & /*to*/) { - throw Exception("Can't set alias of " + getColumnName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't set alias of {}", getColumnName()); } /** Get the text that identifies this element. */ @@ -119,7 +119,7 @@ public: T * casted = dynamic_cast(child.get()); if (!casted) - throw Exception("Could not cast AST subtree", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast AST subtree"); children.push_back(child); field = casted; @@ -129,11 +129,11 @@ public: void replace(T * & field, const ASTPtr & child) { if (!child) - throw Exception("Trying to replace AST subtree with nullptr", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to replace AST subtree with nullptr"); T * casted = dynamic_cast(child.get()); if (!casted) - throw Exception("Could not cast AST subtree", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Could not cast AST subtree"); for (ASTPtr & current_child : children) { @@ -145,7 +145,7 @@ public: } } - throw Exception("AST subtree not found in children", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "AST subtree not found in children"); } template @@ -169,7 +169,7 @@ public: }); if (child == children.end()) - throw Exception("AST subtree not found in children", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "AST subtree not found in children"); children.erase(child); field = nullptr; @@ -237,7 +237,7 @@ public: virtual void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const { - throw Exception("Unknown element in AST: " + getID(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown element in AST: {}", getID()); } // A simple way to add some user-readable context to an error message. diff --git a/src/Parsers/IParser.h b/src/Parsers/IParser.h index 466cdf7a4b1..d5fdf6b7eaa 100644 --- a/src/Parsers/IParser.h +++ b/src/Parsers/IParser.h @@ -70,15 +70,14 @@ public: { ++depth; if (unlikely(max_depth > 0 && depth > max_depth)) - throw Exception( - "Maximum parse depth (" + std::to_string(max_depth) + ") exceeded. Consider rising max_parser_depth parameter.", - ErrorCodes::TOO_DEEP_RECURSION); + throw Exception(ErrorCodes::TOO_DEEP_RECURSION, "Maximum parse depth ({}) exceeded. " + "Consider rising max_parser_depth parameter.", max_depth); } ALWAYS_INLINE void decreaseDepth() { if (unlikely(depth == 0)) - throw Exception("Logical error in parser: incorrect calculation of parse depth", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in parser: incorrect calculation of parse depth"); --depth; } }; diff --git a/src/Parsers/Kusto/ParserKQLOperators.cpp b/src/Parsers/Kusto/ParserKQLOperators.cpp index f8e4f9eaab0..1575cffcc39 100644 --- a/src/Parsers/Kusto/ParserKQLOperators.cpp +++ b/src/Parsers/Kusto/ParserKQLOperators.cpp @@ -20,7 +20,7 @@ String KQLOperators::genHasAnyAllOpExpr(std::vector &tokens, IParser::Po ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); auto haystack = tokens.back(); @@ -55,7 +55,7 @@ String KQLOperators::genInOpExpr(IParser::Pos &token_pos, String kql_op, String ++token_pos; if (!s_lparen.ignore(token_pos, expected)) - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); --token_pos; --token_pos; @@ -115,7 +115,7 @@ String KQLOperators::genHaystackOpExpr(std::vector &tokens,IParser::Pos new_expr = ch_op +"(" + tokens.back() +", concat('" + left_wildcards + left_space + "', " + tmp_arg +", '"+ right_space + right_wildcards + "'))"; } else - throw Exception("Syntax error near " + kql_op, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error near {}", kql_op); tokens.pop_back(); return new_expr; } @@ -135,7 +135,7 @@ bool KQLOperators::convert(std::vector &tokens,IParser::Pos &pos) { ++pos; if (pos->isEnd() || pos->type == TokenType::PipeMark || pos->type == TokenType::Semicolon) - throw Exception("Invalid negative operator", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Invalid negative operator"); op ="!"+String(pos->begin,pos->end); } else if (token == "matches") diff --git a/src/Parsers/MySQL/ASTAlterCommand.h b/src/Parsers/MySQL/ASTAlterCommand.h index 933a9700c70..f097ed71219 100644 --- a/src/Parsers/MySQL/ASTAlterCommand.h +++ b/src/Parsers/MySQL/ASTAlterCommand.h @@ -78,7 +78,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTAlterCommand.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTAlterCommand."); } }; diff --git a/src/Parsers/MySQL/ASTAlterQuery.h b/src/Parsers/MySQL/ASTAlterQuery.h index a6987acb327..161e5e40086 100644 --- a/src/Parsers/MySQL/ASTAlterQuery.h +++ b/src/Parsers/MySQL/ASTAlterQuery.h @@ -30,7 +30,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTAlterQuery.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTAlterQuery."); } }; diff --git a/src/Parsers/MySQL/ASTCreateDefines.h b/src/Parsers/MySQL/ASTCreateDefines.h index 95fb7716f3a..3d2a79568ab 100644 --- a/src/Parsers/MySQL/ASTCreateDefines.h +++ b/src/Parsers/MySQL/ASTCreateDefines.h @@ -29,7 +29,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTCreateDefines.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTCreateDefines."); } }; diff --git a/src/Parsers/MySQL/ASTCreateQuery.h b/src/Parsers/MySQL/ASTCreateQuery.h index ceacdd2cd41..d01bed6b7d6 100644 --- a/src/Parsers/MySQL/ASTCreateQuery.h +++ b/src/Parsers/MySQL/ASTCreateQuery.h @@ -34,7 +34,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTCreateQuery.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTCreateQuery."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareColumn.h b/src/Parsers/MySQL/ASTDeclareColumn.h index 6f9f50225a2..30e16b7b84c 100644 --- a/src/Parsers/MySQL/ASTDeclareColumn.h +++ b/src/Parsers/MySQL/ASTDeclareColumn.h @@ -28,7 +28,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareColumn.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareColumn."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareConstraint.h b/src/Parsers/MySQL/ASTDeclareConstraint.h index 8b0153bcd53..82f2597bc4d 100644 --- a/src/Parsers/MySQL/ASTDeclareConstraint.h +++ b/src/Parsers/MySQL/ASTDeclareConstraint.h @@ -28,7 +28,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareConstraint.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareConstraint."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareIndex.h b/src/Parsers/MySQL/ASTDeclareIndex.h index faa8f1378dd..5399c394137 100644 --- a/src/Parsers/MySQL/ASTDeclareIndex.h +++ b/src/Parsers/MySQL/ASTDeclareIndex.h @@ -32,7 +32,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareIndex.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareIndex."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareOption.h b/src/Parsers/MySQL/ASTDeclareOption.h index c493c49c61b..6e248b647c9 100644 --- a/src/Parsers/MySQL/ASTDeclareOption.h +++ b/src/Parsers/MySQL/ASTDeclareOption.h @@ -40,7 +40,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareOptions.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareOptions."); } }; diff --git a/src/Parsers/MySQL/ASTDeclarePartition.h b/src/Parsers/MySQL/ASTDeclarePartition.h index 232fea57561..f2c4103a1ba 100644 --- a/src/Parsers/MySQL/ASTDeclarePartition.h +++ b/src/Parsers/MySQL/ASTDeclarePartition.h @@ -30,7 +30,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclarePartition.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclarePartition."); } }; diff --git a/src/Parsers/MySQL/ASTDeclarePartitionOptions.h b/src/Parsers/MySQL/ASTDeclarePartitionOptions.h index 9e29a5cbbae..cee2d449291 100644 --- a/src/Parsers/MySQL/ASTDeclarePartitionOptions.h +++ b/src/Parsers/MySQL/ASTDeclarePartitionOptions.h @@ -32,7 +32,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclarePartitionOptions.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclarePartitionOptions."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareReference.h b/src/Parsers/MySQL/ASTDeclareReference.h index c003bd7a16c..b04bf52e51c 100644 --- a/src/Parsers/MySQL/ASTDeclareReference.h +++ b/src/Parsers/MySQL/ASTDeclareReference.h @@ -46,7 +46,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareReference.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareReference."); } }; diff --git a/src/Parsers/MySQL/ASTDeclareSubPartition.h b/src/Parsers/MySQL/ASTDeclareSubPartition.h index 4f00a39c99a..82a1a5a3b0b 100644 --- a/src/Parsers/MySQL/ASTDeclareSubPartition.h +++ b/src/Parsers/MySQL/ASTDeclareSubPartition.h @@ -27,7 +27,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDeclareSubPartition.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDeclareSubPartition."); } }; diff --git a/src/Parsers/MySQL/ASTDropQuery.h b/src/Parsers/MySQL/ASTDropQuery.h index ff95277ae5e..742cf6ba421 100644 --- a/src/Parsers/MySQL/ASTDropQuery.h +++ b/src/Parsers/MySQL/ASTDropQuery.h @@ -47,7 +47,7 @@ public: protected: void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const override { - throw Exception("Method formatImpl is not supported by MySQLParser::ASTDropQuery.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method formatImpl is not supported by MySQLParser::ASTDropQuery."); } }; diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 0d522b192e4..208737b5bda 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -684,7 +684,7 @@ bool ParserCreateTableQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expe if (!query->storage) query->set(query->storage, std::make_shared()); else if (query->storage->primary_key) - throw Exception("Multiple primary keys are not allowed.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple primary keys are not allowed."); query->storage->primary_key = query->columns_list->primary_key; } diff --git a/src/Parsers/ParserExternalDDLQuery.cpp b/src/Parsers/ParserExternalDDLQuery.cpp index 5d6874f524d..01e8fff4f3c 100644 --- a/src/Parsers/ParserExternalDDLQuery.cpp +++ b/src/Parsers/ParserExternalDDLQuery.cpp @@ -62,16 +62,16 @@ bool ParserExternalDDLQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect /// Syntax error is ignored, so we need to convert the error code for parsing failure if (ParserKeyword("ALTER TABLE").ignore(pos)) - throw Exception("Cannot parse MySQL alter query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL alter query."); if (ParserKeyword("RENAME TABLE").ignore(pos)) - throw Exception("Cannot parse MySQL rename query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL rename query."); if (ParserKeyword("DROP TABLE").ignore(pos) || ParserKeyword("TRUNCATE").ignore(pos)) - throw Exception("Cannot parse MySQL drop query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL drop query."); if (ParserKeyword("CREATE TABLE").ignore(pos) || ParserKeyword("CREATE TEMPORARY TABLE").ignore(pos)) - throw Exception("Cannot parse MySQL create query.", ErrorCodes::MYSQL_SYNTAX_ERROR); + throw Exception(ErrorCodes::MYSQL_SYNTAX_ERROR, "Cannot parse MySQL create query."); } #endif } diff --git a/src/Parsers/ParserInsertQuery.cpp b/src/Parsers/ParserInsertQuery.cpp index 7f8a8d59fd0..9d01cda98a2 100644 --- a/src/Parsers/ParserInsertQuery.cpp +++ b/src/Parsers/ParserInsertQuery.cpp @@ -198,11 +198,9 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (allow_settings_after_format_in_insert && s_settings.ignore(pos, expected)) { if (settings_ast) - throw Exception("You have SETTINGS before and after FORMAT, " - "this is not allowed. " - "Consider switching to SETTINGS before FORMAT " - "and disable allow_settings_after_format_in_insert.", - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, + "You have SETTINGS before and after FORMAT, this is not allowed. " + "Consider switching to SETTINGS before FORMAT and disable allow_settings_after_format_in_insert."); /// Settings are written like SET query, so parse them with ParserSetQuery ParserSetQuery parser_settings(true); @@ -230,14 +228,14 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// If format name is followed by ';' (end of query symbol) there is no data to insert. if (data < end && *data == ';') - throw Exception("You have excessive ';' symbol before data for INSERT.\n" + throw Exception(ErrorCodes::SYNTAX_ERROR, "You have excessive ';' symbol before data for INSERT.\n" "Example:\n\n" "INSERT INTO t (x, y) FORMAT TabSeparated\n" ";\tHello\n" "2\tWorld\n" "\n" "Note that there is no ';' just after format name, " - "you need to put at least one whitespace symbol before the data.", ErrorCodes::SYNTAX_ERROR); + "you need to put at least one whitespace symbol before the data."); while (data < end && (*data == ' ' || *data == '\t' || *data == '\f')) ++data; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 107db51f869..17b082a2ddb 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -324,7 +324,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } if (limit_with_ties_occured && distinct_on_expression_list) - throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + throw Exception(ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED, "Can not use WITH TIES alongside LIMIT BY/DISTINCT ON"); if (s_by.ignore(pos, expected)) { @@ -332,10 +332,10 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// But there are other kind of queries like LIMIT n BY smth LIMIT m WITH TIES which are allowed. /// So we have to ignore WITH TIES exactly in LIMIT BY state. if (limit_with_ties_occured) - throw Exception("Can not use WITH TIES alongside LIMIT BY/DISTINCT ON", ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED); + throw Exception(ErrorCodes::LIMIT_BY_WITH_TIES_IS_NOT_SUPPORTED, "Can not use WITH TIES alongside LIMIT BY/DISTINCT ON"); if (distinct_on_expression_list) - throw Exception("Can not use DISTINCT ON alongside LIMIT BY", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Can not use DISTINCT ON alongside LIMIT BY"); limit_by_length = limit_length; limit_by_offset = limit_offset; @@ -347,7 +347,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } if (top_length && limit_length) - throw Exception("Can not use TOP and LIMIT together", ErrorCodes::TOP_AND_LIMIT_TOGETHER); + throw Exception(ErrorCodes::TOP_AND_LIMIT_TOGETHER, "Can not use TOP and LIMIT together"); } else if (s_offset.ignore(pos, expected)) { @@ -360,7 +360,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_row.ignore(pos, expected)) { if (s_rows.ignore(pos, expected)) - throw Exception("Can not use ROW and ROWS together", ErrorCodes::ROW_AND_ROWS_TOGETHER); + throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); offset_with_fetch_maybe = true; } else if (s_rows.ignore(pos, expected)) @@ -372,12 +372,12 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { /// OFFSET FETCH clause must exists with "ORDER BY" if (!order_expression_list) - throw Exception("Can not use OFFSET FETCH clause without ORDER BY", ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY); + throw Exception(ErrorCodes::OFFSET_FETCH_WITHOUT_ORDER_BY, "Can not use OFFSET FETCH clause without ORDER BY"); if (s_first.ignore(pos, expected)) { if (s_next.ignore(pos, expected)) - throw Exception("Can not use FIRST and NEXT together", ErrorCodes::FIRST_AND_NEXT_TOGETHER); + throw Exception(ErrorCodes::FIRST_AND_NEXT_TOGETHER, "Can not use FIRST and NEXT together"); } else if (!s_next.ignore(pos, expected)) return false; @@ -388,7 +388,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (s_row.ignore(pos, expected)) { if (s_rows.ignore(pos, expected)) - throw Exception("Can not use ROW and ROWS together", ErrorCodes::ROW_AND_ROWS_TOGETHER); + throw Exception(ErrorCodes::ROW_AND_ROWS_TOGETHER, "Can not use ROW and ROWS together"); } else if (!s_rows.ignore(pos, expected)) return false; @@ -452,7 +452,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) /// WITH TIES was used without ORDER BY if (!order_expression_list && select_query->limit_with_ties) - throw Exception("Can not use WITH TIES without ORDER BY", ErrorCodes::WITH_TIES_WITHOUT_ORDER_BY); + throw Exception(ErrorCodes::WITH_TIES_WITHOUT_ORDER_BY, "Can not use WITH TIES without ORDER BY"); /// SETTINGS key1 = value1, key2 = value2, ... if (s_settings.ignore(pos, expected)) diff --git a/src/Parsers/ParserTablesInSelectQuery.cpp b/src/Parsers/ParserTablesInSelectQuery.cpp index 2247167c66e..617ab7816d4 100644 --- a/src/Parsers/ParserTablesInSelectQuery.cpp +++ b/src/Parsers/ParserTablesInSelectQuery.cpp @@ -190,11 +190,11 @@ bool ParserTablesInSelectQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expec if (table_join->strictness != JoinStrictness::Unspecified && table_join->kind == JoinKind::Cross) - throw Exception("You must not specify ANY or ALL for CROSS JOIN.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "You must not specify ANY or ALL for CROSS JOIN."); if ((table_join->strictness == JoinStrictness::Semi || table_join->strictness == JoinStrictness::Anti) && (table_join->kind != JoinKind::Left && table_join->kind != JoinKind::Right)) - throw Exception("SEMI|ANTI JOIN should be LEFT or RIGHT.", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "SEMI|ANTI JOIN should be LEFT or RIGHT."); if (!ParserKeyword("JOIN").ignore(pos, expected)) return false; diff --git a/src/Parsers/obfuscateQueries.cpp b/src/Parsers/obfuscateQueries.cpp index f8697b862c7..09267148c79 100644 --- a/src/Parsers/obfuscateQueries.cpp +++ b/src/Parsers/obfuscateQueries.cpp @@ -602,7 +602,7 @@ std::string_view obfuscateWord(std::string_view src, WordMap & obfuscate_map, Wo { /// Prevent using too many nouns if (obfuscate_map.size() * 2 > nouns.size()) - throw Exception("Too many unique identifiers in queries", ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS); + throw Exception(ErrorCodes::TOO_MANY_TEMPORARY_COLUMNS, "Too many unique identifiers in queries"); std::string_view & mapped = obfuscate_map[src]; if (!mapped.empty()) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 2484bd093ec..95269f70bcc 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -341,7 +341,8 @@ JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & tabl } else { - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", + table_expression->formatASTForErrorMessage()); } if (from_stage == QueryProcessingStage::FetchColumns) diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index f6152e324c9..4546e84a02b 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -725,7 +725,8 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo if (table_join->isEnabledAlgorithm(JoinAlgorithm::AUTO)) return std::make_shared(table_join, right_table_expression_header); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't execute any of specified algorithms for specified strictness/kind and right storage type"); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Can't execute any of specified algorithms for specified strictness/kind and right storage type"); } } diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp index fa6bd774960..221e683f4e2 100644 --- a/src/Planner/Utils.cpp +++ b/src/Planner/Utils.cpp @@ -236,8 +236,9 @@ bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node) default: { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", - join_tree_node_to_process->getNodeTypeName()); + "Unexpected node type for table expression. " + "Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); } } } @@ -301,8 +302,9 @@ bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_no default: { throw Exception(ErrorCodes::LOGICAL_ERROR, - "Unexpected node type for table expression. Expected table, table function, query, union, join or array join. Actual {}", - join_tree_node_to_process->getNodeTypeName()); + "Unexpected node type for table expression. " + "Expected table, table function, query, union, join or array join. Actual {}", + join_tree_node_to_process->getNodeTypeName()); } } } diff --git a/src/Processors/Chunk.cpp b/src/Processors/Chunk.cpp index 61bd118636d..bbfa1683cf6 100644 --- a/src/Processors/Chunk.cpp +++ b/src/Processors/Chunk.cpp @@ -70,8 +70,8 @@ void Chunk::checkNumRowsIsConsistent() { auto & column = columns[i]; if (column->size() != num_rows) - throw Exception("Invalid number of rows in Chunk column " + column->getName()+ " position " + toString(i) + ": expected " + - toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of rows in Chunk column {}: expected {}, got {}", + column->getName()+ " position " + toString(i), toString(num_rows), toString(column->size())); } } @@ -108,8 +108,8 @@ void Chunk::addColumn(ColumnPtr column) if (empty()) num_rows = column->size(); else if (column->size() != num_rows) - throw Exception("Invalid number of rows in Chunk column " + column->getName()+ ": expected " + - toString(num_rows) + ", got " + toString(column->size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid number of rows in Chunk column {}, got {}", + column->getName()+ ": expected " + toString(num_rows), toString(column->size())); columns.emplace_back(std::move(column)); } @@ -133,11 +133,11 @@ void Chunk::addColumn(size_t position, ColumnPtr column) void Chunk::erase(size_t position) { if (columns.empty()) - throw Exception("Chunk is empty", ErrorCodes::POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Chunk is empty"); if (position >= columns.size()) - throw Exception("Position " + toString(position) + " out of bound in Chunk::erase(), max position = " - + toString(columns.size() - 1), ErrorCodes::POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::POSITION_OUT_OF_BOUND, "Position {} out of bound in Chunk::erase(), max position = {}", + toString(position), toString(columns.size() - 1)); columns.erase(columns.begin() + position); } diff --git a/src/Processors/Executors/ExecutingGraph.cpp b/src/Processors/Executors/ExecutingGraph.cpp index cd94ca7ceae..f84efabdee1 100644 --- a/src/Processors/Executors/ExecutingGraph.cpp +++ b/src/Processors/Executors/ExecutingGraph.cpp @@ -392,10 +392,34 @@ bool ExecutingGraph::updateNode(uint64_t pid, Queue & queue, Queue & async_queue void ExecutingGraph::cancel() { - std::lock_guard guard(processors_mutex); - for (auto & processor : *processors) - processor->cancel(); - cancelled = true; + std::exception_ptr exception_ptr; + + { + std::lock_guard guard(processors_mutex); + for (auto & processor : *processors) + { + try + { + processor->cancel(); + } + catch (...) + { + if (!exception_ptr) + exception_ptr = std::current_exception(); + + /// Log any exception since: + /// a) they are pretty rare (the only that I know is from + /// RemoteQueryExecutor) + /// b) there can be exception during query execution, and in this + /// case, this exception can be ignored (not showed to the user). + tryLogCurrentException("ExecutingGraph"); + } + } + cancelled = true; + } + + if (exception_ptr) + std::rethrow_exception(exception_ptr); } } diff --git a/src/Processors/Executors/ExecutorTasks.cpp b/src/Processors/Executors/ExecutorTasks.cpp index d5c2bfe7399..e61d225a968 100644 --- a/src/Processors/Executors/ExecutorTasks.cpp +++ b/src/Processors/Executors/ExecutorTasks.cpp @@ -41,7 +41,7 @@ void ExecutorTasks::tryWakeUpAnyOtherThreadWithTasks(ExecutionThreadContext & se thread_to_wake = threads_queue.popAny(); if (thread_to_wake >= use_threads) - throw Exception("Non-empty queue without allocated thread", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Non-empty queue without allocated thread"); lock.unlock(); executor_contexts[thread_to_wake]->wakeUp(); @@ -89,7 +89,7 @@ void ExecutorTasks::tryGetTask(ExecutionThreadContext & context) { if (finished) return; - throw Exception("Empty task was returned from async task queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty task was returned from async task queue"); } context.setTask(static_cast(res.data)); diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp index ad504e65f94..f1e044e470b 100644 --- a/src/Processors/Executors/PipelineExecutor.cpp +++ b/src/Processors/Executors/PipelineExecutor.cpp @@ -187,7 +187,7 @@ void PipelineExecutor::finalizeExecution() } if (!all_processors_finished) - throw Exception("Pipeline stuck. Current state:\n" + dumpPipeline(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline stuck. Current state:\n{}", dumpPipeline()); } void PipelineExecutor::executeSingleThread(size_t thread_num) diff --git a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp index 5799fbcc5d8..fbbf8c119ce 100644 --- a/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp +++ b/src/Processors/Executors/PullingAsyncPipelineExecutor.cpp @@ -175,20 +175,35 @@ bool PullingAsyncPipelineExecutor::pull(Block & block, uint64_t milliseconds) void PullingAsyncPipelineExecutor::cancel() { + if (!data) + return; + /// Cancel execution if it wasn't finished. - if (data && !data->is_finished && data->executor) - data->executor->cancel(); + try + { + if (!data->is_finished && data->executor) + data->executor->cancel(); + } + catch (...) + { + /// Store exception only of during query execution there was no + /// exception, since only one exception can be re-thrown. + if (!data->has_exception) + { + data->exception = std::current_exception(); + data->has_exception = true; + } + } /// The following code is needed to rethrow exception from PipelineExecutor. /// It could have been thrown from pull(), but we will not likely call it again. /// Join thread here to wait for possible exception. - if (data && data->thread.joinable()) + if (data->thread.joinable()) data->thread.join(); /// Rethrow exception to not swallow it in destructor. - if (data) - data->rethrowExceptionIfHas(); + data->rethrowExceptionIfHas(); } Chunk PullingAsyncPipelineExecutor::getTotals() diff --git a/src/Processors/Executors/TasksQueue.h b/src/Processors/Executors/TasksQueue.h index 542e15eb482..bb8996fc1a6 100644 --- a/src/Processors/Executors/TasksQueue.h +++ b/src/Processors/Executors/TasksQueue.h @@ -25,7 +25,7 @@ public: size_t getAnyThreadWithTasks(size_t from_thread = 0) { if (num_tasks == 0) - throw Exception("TaskQueue is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TaskQueue is empty"); for (size_t i = 0; i < queues.size(); ++i) { @@ -37,7 +37,7 @@ public: from_thread = 0; } - throw Exception("TaskQueue is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TaskQueue is empty"); } Task * pop(size_t thread_num) diff --git a/src/Processors/Executors/ThreadsQueue.h b/src/Processors/Executors/ThreadsQueue.h index 5aca8147706..9ccdf6bcc09 100644 --- a/src/Processors/Executors/ThreadsQueue.h +++ b/src/Processors/Executors/ThreadsQueue.h @@ -36,7 +36,7 @@ struct ThreadsQueue void push(size_t thread) { if (unlikely(has(thread))) - throw Exception("Can't push thread because it is already in threads queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't push thread because it is already in threads queue"); swapThreads(thread, stack[stack_size]); ++stack_size; @@ -45,7 +45,7 @@ struct ThreadsQueue void pop(size_t thread) { if (unlikely(!has(thread))) - throw Exception("Can't pop thread because it is not in threads queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't pop thread because it is not in threads queue"); --stack_size; swapThreads(thread, stack[stack_size]); @@ -54,7 +54,7 @@ struct ThreadsQueue size_t popAny() { if (unlikely(stack_size == 0)) - throw Exception("Can't pop from empty queue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't pop from empty queue"); --stack_size; return stack[stack_size]; diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index f7374111a30..81c818e3334 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -114,7 +114,7 @@ Chunk IRowInputFormat::generate() { size_t column_size = columns[column_idx]->size(); if (column_size == 0) - throw Exception("Unexpected empty column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Unexpected empty column"); block_missing_values.setBit(column_idx, column_size - 1); } } @@ -245,7 +245,7 @@ Chunk IRowInputFormat::generate() void IRowInputFormat::syncAfterError() { - throw Exception("Method syncAfterError is not implemented for input format", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method syncAfterError is not implemented for input format"); } void IRowInputFormat::resetParser() diff --git a/src/Processors/Formats/IRowOutputFormat.cpp b/src/Processors/Formats/IRowOutputFormat.cpp index ac44dbc0157..3c0692945d4 100644 --- a/src/Processors/Formats/IRowOutputFormat.cpp +++ b/src/Processors/Formats/IRowOutputFormat.cpp @@ -40,7 +40,7 @@ void IRowOutputFormat::consumeTotals(DB::Chunk chunk) auto num_rows = chunk.getNumRows(); if (num_rows != 1) - throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in totals chunk, expected 1", num_rows); const auto & columns = chunk.getColumns(); @@ -57,7 +57,7 @@ void IRowOutputFormat::consumeExtremes(DB::Chunk chunk) auto num_rows = chunk.getNumRows(); const auto & columns = chunk.getColumns(); if (num_rows != 2) - throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in extremes chunk, expected 2", num_rows); writeBeforeExtremes(); writeMinExtreme(columns, 0); diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index 4599cdb8748..48cb093f0ab 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -21,12 +21,14 @@ void checkFinalInferredType(DataTypePtr & type, const String & name, const Forma { if (!default_type) throw Exception( - ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, - "Cannot determine type for column '{}' by first {} rows of data, most likely this column contains only Nulls or empty " - "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " - "If your data contains complex JSON objects, try enabling one of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", - name, - rows_read); + ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, + "Cannot determine type for column '{}' by first {} rows " + "of data, most likely this column contains only Nulls or empty " + "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " + "If your data contains complex JSON objects, try enabling one " + "of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", + name, + rows_read); type = default_type; } diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 9f3f4d880ef..edc5c6068c3 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -12,6 +12,7 @@ namespace DB namespace ErrorCodes { extern const int TYPE_MISMATCH; + extern const int INCORRECT_DATA; } /// Base class for schema inference for the data in some specific format. @@ -176,6 +177,25 @@ void chooseResultColumnType( } } +template +void chooseResultColumnTypes( + SchemaReader & schema_reader, + DataTypes & types, + DataTypes & new_types, + const DataTypePtr & default_type, + const std::vector & column_names, + size_t row) +{ + if (types.size() != new_types.size()) + throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); + + if (types.size() != column_names.size()) + throw Exception(ErrorCodes::INCORRECT_DATA, "The number of column names {} differs from the number of types {}", column_names.size(), types.size()); + + for (size_t i = 0; i != types.size(); ++i) + chooseResultColumnType(schema_reader, types[i], new_types[i], default_type, column_names[i], row); +} + void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read); Strings splitColumnNames(const String & column_names_str); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp index ebd9783b4fd..96ed2a7021e 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -95,6 +96,12 @@ arrow::Result> RandomAccessFileFromSeekableReadBu return buffer; } +arrow::Future> RandomAccessFileFromSeekableReadBuffer::ReadAsync(const arrow::io::IOContext &, int64_t position, int64_t nbytes) +{ + /// Just a stub to to avoid using internal arrow thread pool + return arrow::Future>::MakeFinished(ReadAt(position, nbytes)); +} + arrow::Status RandomAccessFileFromSeekableReadBuffer::Seek(int64_t position) { seekable_in.seek(position, SEEK_SET); diff --git a/src/Processors/Formats/Impl/ArrowBufferedStreams.h b/src/Processors/Formats/Impl/ArrowBufferedStreams.h index dc69b5a50fa..325975a7cfe 100644 --- a/src/Processors/Formats/Impl/ArrowBufferedStreams.h +++ b/src/Processors/Formats/Impl/ArrowBufferedStreams.h @@ -62,6 +62,11 @@ public: arrow::Result> Read(int64_t nbytes) override; + /// Override async reading to avoid using internal arrow thread pool. + /// In our code we don't use async reading, so implementation is sync, + /// we just call ReadAt and return future with ready value. + arrow::Future> ReadAsync(const arrow::io::IOContext&, int64_t position, int64_t nbytes) override; + arrow::Status Seek(int64_t position) override; private: diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 52c868d4e0c..8b9e309aa1f 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -386,7 +386,9 @@ static ColumnWithTypeAndName readColumnWithIndexesDataImpl(std::shared_ptrlength(); ++i) { if (data[i] < 0 || data[i] >= dict_size) - throw Exception(ErrorCodes::INCORRECT_DATA, "Index {} in Dictionary column is out of bounds, dictionary size is {}", Int64(data[i]), UInt64(dict_size)); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Index {} in Dictionary column is out of bounds, dictionary size is {}", + Int64(data[i]), UInt64(dict_size)); } /// If dictionary type is not nullable and arrow dictionary contains default type @@ -734,13 +736,15 @@ static ColumnWithTypeAndName readColumnFromArrowColumn( } throw Exception( - ErrorCodes::UNKNOWN_TYPE, - "Unsupported {} type '{}' of an input column '{}'. If it happens during schema inference and you want to skip columns with " - "unsupported types, you can enable setting input_format_{}_skip_columns_with_unsupported_types_in_schema_inference", - format_name, - arrow_column->type()->name(), - column_name, - boost::algorithm::to_lower_copy(format_name)); + ErrorCodes::UNKNOWN_TYPE, + "Unsupported {} type '{}' of an input column '{}'. " + "If it happens during schema inference and you want to skip columns with " + "unsupported types, you can enable setting input_format_{}" + "_skip_columns_with_unsupported_types_in_schema_inference", + format_name, + arrow_column->type()->name(), + column_name, + boost::algorithm::to_lower_copy(format_name)); } } } diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp index 8c6cd8bd91b..fa437b16ba7 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp @@ -146,7 +146,7 @@ static void insertNumber(IColumn & column, WhichDataType type, T value) assert_cast &>(column).insertValue(static_cast(value)); break; default: - throw Exception("Type is not compatible with Avro", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type is not compatible with Avro"); } } @@ -519,7 +519,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) auto index = decoder.decodeUnionIndex(); if (index >= union_skip_fns.size()) { - throw Exception("Union index out of boundary", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Union index out of boundary"); } union_skip_fns[index](decoder); }; @@ -575,7 +575,7 @@ AvroDeserializer::SkipFn AvroDeserializer::createSkipFn(avro::NodePtr root_node) }; } default: - throw Exception("Unsupported Avro type " + root_node->name().fullname() + " (" + toString(int(root_node->type())) + ")", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unsupported Avro type {} ({})", root_node->name().fullname(), int(root_node->type())); } } @@ -719,7 +719,7 @@ AvroDeserializer::AvroDeserializer(const Block & header, avro::ValidSchema schem const auto & schema_root = schema.root(); if (schema_root->type() != avro::AVRO_RECORD) { - throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Root schema must be a record"); } column_found.resize(header.columns()); @@ -731,7 +731,7 @@ AvroDeserializer::AvroDeserializer(const Block & header, avro::ValidSchema schem { if (!column_found[i]) { - throw Exception("Field " + header.getByPosition(i).name + " not found in Avro schema", ErrorCodes::THERE_IS_NO_COLUMN); + throw Exception(ErrorCodes::THERE_IS_NO_COLUMN, "Field {} not found in Avro schema", header.getByPosition(i).name); } } } @@ -782,7 +782,7 @@ public: : base_url(base_url_), schema_cache(schema_cache_max_size) { if (base_url.empty()) - throw Exception("Empty Schema Registry URL", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Empty Schema Registry URL"); } avro::ValidSchema getSchema(uint32_t id) @@ -889,7 +889,7 @@ static uint32_t readConfluentSchemaId(ReadBuffer & in) if (e.code() == ErrorCodes::CANNOT_READ_ALL_DATA) { /* empty or incomplete message without Avro Confluent magic number or schema id */ - throw Exception("Missing AvroConfluent magic byte or schema identifier.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Missing AvroConfluent magic byte or schema identifier."); } else throw; @@ -897,8 +897,8 @@ static uint32_t readConfluentSchemaId(ReadBuffer & in) if (magic != 0x00) { - throw Exception("Invalid magic byte before AvroConfluent schema identifier." - " Must be zero byte, found " + std::to_string(int(magic)) + " instead", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid magic byte before AvroConfluent schema identifier. " + "Must be zero byte, found {} instead", int(magic)); } return schema_id; @@ -977,7 +977,7 @@ NamesAndTypesList AvroSchemaReader::readSchema() } if (root_node->type() != avro::Type::AVRO_RECORD) - throw Exception("Root schema must be a record", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Root schema must be a record"); NamesAndTypesList names_and_types; for (int i = 0; i != static_cast(root_node->leaves()); ++i) diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.h b/src/Processors/Formats/Impl/AvroRowInputFormat.h index 4525d7d33b0..96370b8c4c7 100644 --- a/src/Processors/Formats/Impl/AvroRowInputFormat.h +++ b/src/Processors/Formats/Impl/AvroRowInputFormat.h @@ -103,7 +103,7 @@ private: auto index = decoder.decodeUnionIndex(); if (index >= actions.size()) { - throw Exception("Union index out of boundary", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Union index out of boundary"); } actions[index].execute(columns, decoder, ext); break; diff --git a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp index 7efe2a999b4..3f29469c883 100644 --- a/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/AvroRowOutputFormat.cpp @@ -384,7 +384,7 @@ AvroSerializer::SchemaWithSerializeFn AvroSerializer::createSchemaWithSerializeF default: break; } - throw Exception("Type " + data_type->getName() + " is not supported for Avro output", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for Avro output", data_type->getName()); } @@ -438,7 +438,7 @@ static avro::Codec getCodec(const std::string & codec_name) if (codec_name == "snappy") return avro::Codec::SNAPPY_CODEC; #endif - throw Exception("Avro codec " + codec_name + " is not available", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Avro codec {} is not available", codec_name); } AvroRowOutputFormat::AvroRowOutputFormat( diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp index 84d84756bd0..349b063fabe 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -146,7 +146,8 @@ static void readAndInsertInteger(ReadBuffer & in, IColumn & column, const DataTy } else { - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); } } @@ -154,7 +155,8 @@ template static void readAndInsertDouble(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) { if (bson_type != BSONType::DOUBLE) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); Float64 value; readBinary(value, in); @@ -165,7 +167,8 @@ template static void readAndInsertSmallDecimal(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) { if (bson_type != expected_bson_type) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); DecimalType value; readBinary(value, in); @@ -186,12 +189,14 @@ template static void readAndInsertBigInteger(ReadBuffer & in, IColumn & column, const DataTypePtr & data_type, BSONType bson_type) { if (bson_type != BSONType::BINARY) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON {} into column with type {}", + getBSONTypeName(bson_type), data_type->getName()); auto size = readBSONSize(in); auto subtype = getBSONBinarySubtype(readBSONType(in)); if (subtype != BSONBinarySubtype::BINARY) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Binary subtype {} into column with type {}", getBSONBinarySubtypeName(subtype), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Binary subtype {} into column with type {}", + getBSONBinarySubtypeName(subtype), data_type->getName()); using ValueType = typename ColumnType::ValueType; @@ -216,7 +221,7 @@ static void readAndInsertStringImpl(ReadBuffer & in, IColumn & column, size_t si auto & fixed_string_column = assert_cast(column); size_t n = fixed_string_column.getN(); if (size > n) - throw Exception("Too large string for FixedString column", ErrorCodes::TOO_LARGE_STRING_SIZE); + throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "Too large string for FixedString column"); auto & data = fixed_string_column.getChars(); @@ -375,18 +380,20 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr & auto try_get_index = data_type_tuple->tryGetPositionByName(name.toString()); if (!try_get_index) throw Exception( - ErrorCodes::INCORRECT_DATA, - "Cannot parse tuple column with type {} from BSON array/embedded document field: tuple doesn't have element with name \"{}\"", - data_type->getName(), - name); + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field: " + "tuple doesn't have element with name \"{}\"", + data_type->getName(), + name); index = *try_get_index; } if (index >= data_type_tuple->getElements().size()) throw Exception( - ErrorCodes::INCORRECT_DATA, - "Cannot parse tuple column with type {} from BSON array/embedded document field: the number of fields BSON document exceeds the number of fields in tuple", - data_type->getName()); + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field: " + "the number of fields BSON document exceeds the number of fields in tuple", + data_type->getName()); readField(tuple_column.getColumn(index), data_type_tuple->getElement(index), nested_bson_type); ++read_nested_columns; @@ -396,11 +403,12 @@ void BSONEachRowRowInputFormat::readTuple(IColumn & column, const DataTypePtr & if (read_nested_columns != data_type_tuple->getElements().size()) throw Exception( - ErrorCodes::INCORRECT_DATA, - "Cannot parse tuple column with type {} from BSON array/embedded document field, the number of fields in tuple and BSON document doesn't match: {} != {}", - data_type->getName(), - data_type_tuple->getElements().size(), - read_nested_columns); + ErrorCodes::INCORRECT_DATA, + "Cannot parse tuple column with type {} from BSON array/embedded document field, " + "the number of fields in tuple and BSON document doesn't match: {} != {}", + data_type->getName(), + data_type_tuple->getElements().size(), + read_nested_columns); } void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & data_type, BSONType bson_type) @@ -411,7 +419,9 @@ void BSONEachRowRowInputFormat::readMap(IColumn & column, const DataTypePtr & da const auto * data_type_map = assert_cast(data_type.get()); const auto & key_data_type = data_type_map->getKeyType(); if (!isStringOrFixedString(key_data_type)) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", key_data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Only maps with String key type are supported in BSON, got key type: {}", + key_data_type->getName()); const auto & value_data_type = data_type_map->getValueType(); auto & column_map = assert_cast(column); @@ -446,7 +456,9 @@ bool BSONEachRowRowInputFormat::readField(IColumn & column, const DataTypePtr & } if (!format_settings.null_as_default) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert BSON Null value into non-nullable column with type {}", getBSONTypeName(bson_type), data_type->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Cannot insert BSON Null value into non-nullable column with type {}", + getBSONTypeName(bson_type), data_type->getName()); column.insertDefault(); return false; diff --git a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp index c9530d4ba81..211021b0d78 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowOutputFormat.cpp @@ -229,7 +229,9 @@ size_t BSONEachRowRowOutputFormat::countBSONFieldSize(const IColumn & column, co const auto & map_type = assert_cast(*data_type); if (!isStringOrFixedString(map_type.getKeyType())) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Only maps with String key type are supported in BSON, got key type: {}", + map_type.getKeyType()->getName()); const auto & value_type = map_type.getValueType(); const auto & map_column = assert_cast(column); @@ -452,7 +454,9 @@ void BSONEachRowRowOutputFormat::serializeField(const IColumn & column, const Da { const auto & map_type = assert_cast(*data_type); if (!isStringOrFixedString(map_type.getKeyType())) - throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Only maps with String key type are supported in BSON, got key type: {}", map_type.getKeyType()->getName()); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, + "Only maps with String key type are supported in BSON, got key type: {}", + map_type.getKeyType()->getName()); const auto & value_type = map_type.getValueType(); const auto & map_column = assert_cast(column); diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp index a41cf687b39..a4f779076eb 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.cpp @@ -90,7 +90,8 @@ void BinaryFormatReader::skipTypes() void BinaryFormatReader::skipField(size_t file_column) { if (file_column >= read_data_types.size()) - throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, "Cannot skip unknown field in RowBinaryWithNames format, because it's type is unknown"); + throw Exception(ErrorCodes::CANNOT_SKIP_UNKNOWN_FIELD, + "Cannot skip unknown field in RowBinaryWithNames format, because it's type is unknown"); Field field; read_data_types[file_column]->getDefaultSerialization()->deserializeBinary(field, *in, format_settings); } diff --git a/src/Processors/Formats/Impl/BinaryRowInputFormat.h b/src/Processors/Formats/Impl/BinaryRowInputFormat.h index 7e600c5b3dd..3d3d80f1043 100644 --- a/src/Processors/Formats/Impl/BinaryRowInputFormat.h +++ b/src/Processors/Formats/Impl/BinaryRowInputFormat.h @@ -8,11 +8,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - class ReadBuffer; /** A stream for inputting data in a binary line-by-line format. @@ -59,11 +54,6 @@ public: BinaryWithNamesAndTypesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypes() override - { - throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypes is not implemented"}; - } - BinaryFormatReader reader; }; diff --git a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp index 8319ef65e17..f529c6e9363 100644 --- a/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp +++ b/src/Processors/Formats/Impl/CHColumnToArrowColumn.cpp @@ -179,7 +179,7 @@ namespace DB if (need_rescale) { if (common::mulOverflow(value, rescale_multiplier, value)) - throw Exception("Decimal math overflow", ErrorCodes::DECIMAL_OVERFLOW); + throw Exception(ErrorCodes::DECIMAL_OVERFLOW, "Decimal math overflow"); } status = builder.Append(value); } @@ -295,8 +295,7 @@ namespace DB case TypeIndex::UInt64: return extractIndexesImpl(column, start, end, shift); default: - throw Exception(fmt::format("Indexes column must be ColumnUInt, got {}.", column->getName()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Indexes column must be ColumnUInt, got {}.", column->getName()); } } @@ -641,8 +640,7 @@ namespace DB case TypeIndex::UInt64: return arrow::int64(); default: - throw Exception(fmt::format("Indexes column for getUniqueIndex must be ColumnUInt, got {}.", indexes_column->getName()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Indexes column for getUniqueIndex must be ColumnUInt, got {}.", indexes_column->getName()); } } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index 1f1bf99739a..20b0c204c8e 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -14,6 +14,7 @@ namespace DB { + namespace ErrorCodes { extern const int BAD_ARGUMENTS; @@ -21,6 +22,19 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } +namespace +{ + void checkBadDelimiter(char delimiter) + { + constexpr std::string_view bad_delimiters = " \t\"'.UL"; + if (bad_delimiters.find(delimiter) != std::string_view::npos) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "CSV format may not work correctly with delimiter '{}'. Try use CustomSeparated format instead", + delimiter); + } +} + CSVRowInputFormat::CSVRowInputFormat( const Block & header_, ReadBuffer & in_, @@ -29,13 +43,13 @@ CSVRowInputFormat::CSVRowInputFormat( bool with_types_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, in_, params_, with_names_, with_types_, format_settings_, std::make_unique(in_, format_settings_)) + header_, std::make_shared(in_), params_, with_names_, with_types_, format_settings_) { } CSVRowInputFormat::CSVRowInputFormat( const Block & header_, - ReadBuffer & in_, + std::shared_ptr in_, const Params & params_, bool with_names_, bool with_types_, @@ -43,25 +57,49 @@ CSVRowInputFormat::CSVRowInputFormat( std::unique_ptr format_reader_) : RowInputFormatWithNamesAndTypes( header_, - in_, + *in_, params_, false, with_names_, with_types_, format_settings_, - std::move(format_reader_)) + std::move(format_reader_), + format_settings_.csv.try_detect_header), + buf(std::move(in_)) { - const String bad_delimiters = " \t\"'.UL"; - if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos) - throw Exception( - String("CSV format may not work correctly with delimiter '") + format_settings.csv.delimiter - + "'. Try use CustomSeparated format instead.", - ErrorCodes::BAD_ARGUMENTS); + checkBadDelimiter(format_settings_.csv.delimiter); +} + +CSVRowInputFormat::CSVRowInputFormat( + const Block & header_, + std::shared_ptr in_, + const Params & params_, + bool with_names_, + bool with_types_, + const FormatSettings & format_settings_) + : RowInputFormatWithNamesAndTypes( + header_, + *in_, + params_, + false, + with_names_, + with_types_, + format_settings_, + std::make_unique(*in_, format_settings_), + format_settings_.csv.try_detect_header), + buf(std::move(in_)) +{ + checkBadDelimiter(format_settings_.csv.delimiter); } void CSVRowInputFormat::syncAfterError() { - skipToNextLineOrEOF(*in); + skipToNextLineOrEOF(*buf); +} + +void CSVRowInputFormat::setReadBuffer(ReadBuffer & in_) +{ + buf->setSubBuffer(in_); } static void skipEndOfLine(ReadBuffer & in) @@ -86,7 +124,7 @@ static void skipEndOfLine(ReadBuffer & in) ErrorCodes::INCORRECT_DATA); } else if (!in.eof()) - throw Exception("Expected end of line", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Expected end of line"); } /// Skip `whitespace` symbols allowed in CSV. @@ -96,51 +134,51 @@ static inline void skipWhitespacesAndTabs(ReadBuffer & in) ++in.position(); } -CSVFormatReader::CSVFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(in_, format_settings_) +CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesReader(buf_, format_settings_), buf(&buf_) { } void CSVFormatReader::skipFieldDelimiter() { - skipWhitespacesAndTabs(*in); - assertChar(format_settings.csv.delimiter, *in); + skipWhitespacesAndTabs(*buf); + assertChar(format_settings.csv.delimiter, *buf); } template String CSVFormatReader::readCSVFieldIntoString() { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); String field; if constexpr (read_string) - readCSVString(field, *in, format_settings.csv); + readCSVString(field, *buf, format_settings.csv); else - readCSVField(field, *in, format_settings.csv); + readCSVField(field, *buf, format_settings.csv); return field; } void CSVFormatReader::skipField() { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); NullOutput out; - readCSVStringInto(out, *in, format_settings.csv); + readCSVStringInto(out, *buf, format_settings.csv); } void CSVFormatReader::skipRowEndDelimiter() { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); - if (in->eof()) + if (buf->eof()) return; /// we support the extra delimiter at the end of the line - if (*in->position() == format_settings.csv.delimiter) - ++in->position(); + if (*buf->position() == format_settings.csv.delimiter) + ++buf->position(); - skipWhitespacesAndTabs(*in); - if (in->eof()) + skipWhitespacesAndTabs(*buf); + if (buf->eof()) return; - skipEndOfLine(*in); + skipEndOfLine(*buf); } void CSVFormatReader::skipHeaderRow() @@ -148,8 +186,8 @@ void CSVFormatReader::skipHeaderRow() do { skipField(); - skipWhitespacesAndTabs(*in); - } while (checkChar(format_settings.csv.delimiter, *in)); + skipWhitespacesAndTabs(*buf); + } while (checkChar(format_settings.csv.delimiter, *buf)); skipRowEndDelimiter(); } @@ -157,12 +195,13 @@ void CSVFormatReader::skipHeaderRow() template std::vector CSVFormatReader::readRowImpl() { + std::vector fields; do { fields.push_back(readCSVFieldIntoString()); - skipWhitespacesAndTabs(*in); - } while (checkChar(format_settings.csv.delimiter, *in)); + skipWhitespacesAndTabs(*buf); + } while (checkChar(format_settings.csv.delimiter, *buf)); skipRowEndDelimiter(); return fields; @@ -174,12 +213,12 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) try { - skipWhitespacesAndTabs(*in); - assertChar(delimiter, *in); + skipWhitespacesAndTabs(*buf); + assertChar(delimiter, *buf); } catch (const DB::Exception &) { - if (*in->position() == '\n' || *in->position() == '\r') + if (*buf->position() == '\n' || *buf->position() == '\r') { out << "ERROR: Line feed found where delimiter (" << delimiter << ") is expected." @@ -189,7 +228,7 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) else { out << "ERROR: There is no delimiter (" << delimiter << "). "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n"; } return false; @@ -200,24 +239,24 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); - if (in->eof()) + if (buf->eof()) return true; /// we support the extra delimiter at the end of the line - if (*in->position() == format_settings.csv.delimiter) + if (*buf->position() == format_settings.csv.delimiter) { - ++in->position(); - skipWhitespacesAndTabs(*in); - if (in->eof()) + ++buf->position(); + skipWhitespacesAndTabs(*buf); + if (buf->eof()) return true; } - if (!in->eof() && *in->position() != '\n' && *in->position() != '\r') + if (!buf->eof() && *buf->position() != '\n' && *buf->position() != '\r') { out << "ERROR: There is no line feed. "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n" " It's like your file has more columns than expected.\n" "And if your file has the right number of columns, maybe it has an unquoted string value with a comma.\n"; @@ -225,7 +264,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) return false; } - skipEndOfLine(*in); + skipEndOfLine(*buf); return true; } @@ -236,10 +275,10 @@ bool CSVFormatReader::readField( bool is_last_file_column, const String & /*column_name*/) { - skipWhitespacesAndTabs(*in); + skipWhitespacesAndTabs(*buf); - const bool at_delimiter = !in->eof() && *in->position() == format_settings.csv.delimiter; - const bool at_last_column_line_end = is_last_file_column && (in->eof() || *in->position() == '\n' || *in->position() == '\r'); + const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter; + const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r'); /// Note: Tuples are serialized in CSV as separate columns, but with empty_as_default or null_as_default /// only one empty or NULL column will be expected @@ -254,17 +293,16 @@ bool CSVFormatReader::readField( column.insertDefault(); return false; } - else if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) + + if (format_settings.null_as_default && !type->isNullable() && !type->isLowCardinalityNullable()) { /// If value is null but type is not nullable then use default value instead. - return SerializationNullable::deserializeTextCSVImpl(column, *in, format_settings, serialization); - } - else - { - /// Read the column normally. - serialization->deserializeTextCSV(column, *in, format_settings); - return true; + return SerializationNullable::deserializeTextCSVImpl(column, *buf, format_settings, serialization); } + + /// Read the column normally. + serialization->deserializeTextCSV(column, *buf, format_settings); + return true; } void CSVFormatReader::skipPrefixBeforeHeader() @@ -273,27 +311,39 @@ void CSVFormatReader::skipPrefixBeforeHeader() readRow(); } +void CSVFormatReader::setReadBuffer(ReadBuffer & in_) +{ + buf = assert_cast(&in_); + FormatWithNamesAndTypesReader::setReadBuffer(*buf); +} CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( - in_, + buf, format_settings_, with_names_, with_types_, &reader, - getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV)) - , reader(in_, format_settings_) + getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), + format_settings_.csv.try_detect_header) + , buf(in_) + , reader(buf, format_settings_) { } - -DataTypes CSVSchemaReader::readRowAndGetDataTypes() +std::pair, DataTypes> CSVSchemaReader::readRowAndGetFieldsAndDataTypes() { - if (in.eof()) + if (buf.eof()) return {}; auto fields = reader.readRow(); - return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV); + auto data_types = tryInferDataTypesByEscapingRule(fields, format_settings, FormatSettings::EscapingRule::CSV); + return {fields, data_types}; +} + +DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl() +{ + return std::move(readRowAndGetFieldsAndDataTypes().second); } @@ -330,7 +380,7 @@ std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memor { pos = find_first_symbols<'"'>(pos, in.buffer().end()); if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); else if (pos == in.buffer().end()) continue; else if (*pos == '"') @@ -346,7 +396,7 @@ std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memor { pos = find_first_symbols<'"', '\r', '\n'>(pos, in.buffer().end()); if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); else if (pos == in.buffer().end()) continue; @@ -383,10 +433,10 @@ std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memor void registerFileSegmentationEngineCSV(FormatFactory & factory) { - auto register_func = [&](const String & format_name, bool with_names, bool with_types) + auto register_func = [&](const String & format_name, bool, bool) { - size_t min_rows = 1 + int(with_names) + int(with_types); - factory.registerFileSegmentationEngine(format_name, [min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) + static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment). + factory.registerFileSegmentationEngine(format_name, [](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) { return fileSegmentationEngineCSVImpl(in, memory, min_bytes, min_rows, max_rows); }); @@ -410,7 +460,7 @@ void registerCSVSchemaReader(FormatFactory & factory) { String result = getAdditionalFormatInfoByEscapingRule(settings, FormatSettings::EscapingRule::CSV); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}", settings.column_names_for_schema_inference); + result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}", settings.column_names_for_schema_inference, settings.csv.try_detect_header); return result; }); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 1d79265c22b..86f7fe3466c 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -26,19 +26,27 @@ public: String getName() const override { return "CSVRowInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + protected: - explicit CSVRowInputFormat(const Block & header_, ReadBuffer & in_, const Params & params_, - bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr format_reader_); + CSVRowInputFormat(const Block & header_, std::shared_ptr in_, const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_, std::unique_ptr format_reader_); + + CSVRowInputFormat(const Block & header_, std::shared_ptr in_buf_, const Params & params_, + bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: bool allowSyncAfterError() const override { return true; } void syncAfterError() override; + +protected: + std::shared_ptr buf; }; class CSVFormatReader : public FormatWithNamesAndTypesReader { public: - CSVFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_); + CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_); bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; @@ -64,12 +72,21 @@ public: std::vector readTypes() override { return readHeaderRow(); } std::vector readHeaderRow() { return readRowImpl(); } std::vector readRow() { return readRowImpl(); } + std::vector readRowForHeaderDetection() override { return readHeaderRow(); } + template std::vector readRowImpl(); template String readCSVFieldIntoString(); + + void setReadBuffer(ReadBuffer & in_) override; + + FormatSettings::EscapingRule getEscapingRule() const override { return FormatSettings::EscapingRule::CSV; } + +protected: + PeekableReadBuffer * buf; }; class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader @@ -78,9 +95,12 @@ public: CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; + std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + PeekableReadBuffer buf; CSVFormatReader reader; + DataTypes buffered_types; }; std::pair fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t min_rows, size_t max_rows); diff --git a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp index 9e7a5a9aa8f..5d438d47de6 100644 --- a/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp +++ b/src/Processors/Formats/Impl/ConstantExpressionTemplate.cpp @@ -130,8 +130,8 @@ static void fillLiteralInfo(DataTypes & nested_types, LiteralInfo & info) field_type = Field::Types::Map; } else - throw Exception("Unexpected literal type inside Array: " + nested_type->getName() + ". It's a bug", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected literal type inside Array: {}. It's a bug", + nested_type->getName()); if (is_nullable) nested_type = std::make_shared(nested_type); @@ -159,9 +159,9 @@ public: else if (ast->as()) return; else if (ast->as()) - throw DB::Exception("Identifier in constant expression", ErrorCodes::SYNTAX_ERROR); + throw DB::Exception(ErrorCodes::SYNTAX_ERROR, "Identifier in constant expression"); else - throw DB::Exception("Syntax error in constant expression", ErrorCodes::SYNTAX_ERROR); + throw DB::Exception(ErrorCodes::SYNTAX_ERROR, "Syntax error in constant expression"); } private: @@ -315,7 +315,7 @@ ConstantExpressionTemplate::TemplateStructure::TemplateStructure(LiteralsInfo & { const LiteralInfo & info = replaced_literals[i]; if (info.literal->begin.value() < prev_end) - throw Exception("Cannot replace literals", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot replace literals"); while (prev_end < info.literal->begin.value()) { @@ -616,13 +616,12 @@ ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, si structure->actions_on_literals->execute(evaluated); if (!evaluated || evaluated.rows() != rows_count) - throw Exception("Number of rows mismatch after evaluation of batch of constant expressions: got " + - std::to_string(evaluated.rows()) + " rows for " + std::to_string(rows_count) + " expressions", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Number of rows mismatch after evaluation of batch of constant expressions: " + "got {} rows for {} expressions", evaluated.rows(), rows_count); if (!evaluated.has(structure->result_column_name)) - throw Exception("Cannot evaluate template " + structure->result_column_name + ", block structure:\n" + evaluated.dumpStructure(), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot evaluate template {}, block structure:\n{}", + structure->result_column_name, evaluated.dumpStructure()); rows_count = 0; auto res = evaluated.getByName(structure->result_column_name); @@ -633,7 +632,7 @@ ColumnPtr ConstantExpressionTemplate::evaluateAll(BlockMissingValues & nulls, si /// Extract column with evaluated expression and mask for NULLs const auto & tuple = assert_cast(*res.column); if (tuple.tupleSize() != 2) - throw Exception("Invalid tuple size, it'a a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid tuple size, it'a a bug"); const auto & is_null = assert_cast(tuple.getColumn(1)); for (size_t i = 0; i < is_null.size(); ++i) diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index a2f2e59ef16..6f73ede5d4d 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -42,7 +42,8 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( with_names_, with_types_, format_settings_, - std::make_unique(*buf_, ignore_spaces_, format_settings_)) + std::make_unique(*buf_, ignore_spaces_, format_settings_), + format_settings_.custom.try_detect_header) , buf(std::move(buf_)) { /// In case of CustomSeparatedWithNames(AndTypes) formats and enabled setting input_format_with_names_use_header we don't know @@ -53,8 +54,10 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( && format_settings_.custom.row_between_delimiter.empty()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Input format CustomSeparatedWithNames(AndTypes) cannot work properly with enabled setting input_format_with_names_use_header, " - "when format_custom_field_delimiter and format_custom_row_after_delimiter are the same and format_custom_row_between_delimiter is empty."); + "Input format CustomSeparatedWithNames(AndTypes) cannot work properly with enabled setting " + "input_format_with_names_use_header, when format_custom_field_delimiter and " + "format_custom_row_after_delimiter are the same " + "and format_custom_row_between_delimiter is empty."); } } @@ -160,14 +163,14 @@ bool CustomSeparatedFormatReader::checkEndOfRow() return checkForSuffixImpl(true); } -template +template String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_last, bool is_unknown) { if (!is_first) skipFieldDelimiter(); skipSpaces(); updateFormatSettings(is_last); - if constexpr (is_header) + if constexpr (mode != ReadFieldMode::AS_FIELD) { /// If the number of columns is unknown and we use CSV escaping rule, /// we don't know what delimiter to expect after the value, @@ -176,7 +179,10 @@ String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_l return readCSVStringWithTwoPossibleDelimiters( *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); - return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + if constexpr (mode == ReadFieldMode::AS_STRING) + return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + else + return readStringOrFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); } else { @@ -188,7 +194,7 @@ String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_l } } -template +template std::vector CustomSeparatedFormatReader::readRowImpl() { std::vector values; @@ -198,14 +204,14 @@ std::vector CustomSeparatedFormatReader::readRowImpl() { do { - values.push_back(readFieldIntoString(values.empty(), false, true)); + values.push_back(readFieldIntoString(values.empty(), false, true)); } while (!checkEndOfRow()); columns = values.size(); } else { for (size_t i = 0; i != columns; ++i) - values.push_back(readFieldIntoString(i == 0, i + 1 == columns, false)); + values.push_back(readFieldIntoString(i == 0, i + 1 == columns, false)); } skipRowEndDelimiter(); @@ -341,7 +347,7 @@ bool CustomSeparatedFormatReader::parseRowBetweenDelimiterWithDiagnosticInfo(Wri void CustomSeparatedFormatReader::setReadBuffer(ReadBuffer & in_) { buf = assert_cast(&in_); - FormatWithNamesAndTypesReader::setReadBuffer(in_); + FormatWithNamesAndTypesReader::setReadBuffer(*buf); } CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( @@ -352,16 +358,20 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( with_names_, with_types_, &reader, - getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule)) + getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule), + format_setting_.custom.try_detect_header) , buf(in_) , reader(buf, ignore_spaces_, format_setting_) { } -DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes() +std::pair, DataTypes> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { - if (reader.checkForSuffix()) + if (no_more_data || reader.checkForSuffix()) + { + no_more_data = true; return {}; + } if (!first_row || with_names || with_types) reader.skipRowBetweenDelimiter(); @@ -370,7 +380,13 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes() first_row = false; auto fields = reader.readRow(); - return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); + auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); + return {fields, data_types}; +} + +DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl() +{ + return readRowAndGetFieldsAndDataTypes().second; } void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) @@ -414,7 +430,7 @@ void registerCustomSeparatedSchemaReader(FormatFactory & factory) { String result = getAdditionalFormatInfoByEscapingRule(settings, settings.custom.escaping_rule); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}", settings.column_names_for_schema_inference); + result += fmt::format(", column_names_for_schema_inference={}, try_detect_header={}", settings.column_names_for_schema_inference, settings.custom.try_detect_header); return result + fmt::format( ", result_before_delimiter={}, row_before_delimiter={}, field_delimiter={}," " row_after_delimiter={}, row_between_delimiter={}, result_after_delimiter={}", diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index 8a3112eb9c1..26ee32be370 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -68,22 +68,32 @@ public: std::vector readNames() override { return readHeaderRow(); } std::vector readTypes() override { return readHeaderRow(); } - std::vector readHeaderRow() {return readRowImpl(); } + std::vector readHeaderRow() {return readRowImpl(); } - std::vector readRow() { return readRowImpl(); } + std::vector readRow() { return readRowImpl(); } + + std::vector readRowForHeaderDetection() override { return readRowImpl(); } bool checkEndOfRow(); bool checkForSuffixImpl(bool check_eof); inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf); } - EscapingRule getEscapingRule() { return format_settings.custom.escaping_rule; } + EscapingRule getEscapingRule() const override { return format_settings.custom.escaping_rule; } void setReadBuffer(ReadBuffer & in_) override; + private: - template + enum class ReadFieldMode : uint8_t + { + AS_STRING, + AS_FIELD, + AS_POSSIBLE_STRING, + }; + + template std::vector readRowImpl(); - template + template String readFieldIntoString(bool is_first, bool is_last, bool is_unknown); void updateFormatSettings(bool is_last_column); @@ -99,7 +109,9 @@ public: CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; + + std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; @@ -107,6 +119,7 @@ private: CustomSeparatedFormatReader reader; bool first_row = true; JSONInferenceInfo json_inference_info; + bool no_more_data = false; }; } diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp index ec5612ae30b..1b73e0131f6 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.cpp @@ -30,19 +30,14 @@ HiveTextRowInputFormat::HiveTextRowInputFormat( } HiveTextRowInputFormat::HiveTextRowInputFormat( - const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_) + const Block & header_, std::shared_ptr buf_, const Params & params_, const FormatSettings & format_settings_) : CSVRowInputFormat( - header_, *buf_, params_, true, false, format_settings_, std::make_unique(*buf_, format_settings_)), buf(std::move(buf_)) + header_, buf_, params_, true, false, format_settings_, std::make_unique(*buf_, format_settings_)) { } -void HiveTextRowInputFormat::setReadBuffer(ReadBuffer & in_) -{ - buf->setSubBuffer(in_); -} - HiveTextFormatReader::HiveTextFormatReader(PeekableReadBuffer & buf_, const FormatSettings & format_settings_) - : CSVFormatReader(buf_, format_settings_), buf(&buf_), input_field_names(format_settings_.hive_text.input_field_names) + : CSVFormatReader(buf_, format_settings_), input_field_names(format_settings_.hive_text.input_field_names) { } @@ -59,12 +54,6 @@ std::vector HiveTextFormatReader::readTypes() throw Exception(ErrorCodes::NOT_IMPLEMENTED, "HiveTextRowInputFormat::readTypes is not implemented"); } -void HiveTextFormatReader::setReadBuffer(ReadBuffer & buf_) -{ - buf = assert_cast(&buf_); - CSVFormatReader::setReadBuffer(buf_); -} - void registerInputFormatHiveText(FormatFactory & factory) { factory.registerInputFormat( diff --git a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h index 251486b247c..313aad0d40d 100644 --- a/src/Processors/Formats/Impl/HiveTextRowInputFormat.h +++ b/src/Processors/Formats/Impl/HiveTextRowInputFormat.h @@ -18,13 +18,9 @@ public: String getName() const override { return "HiveTextRowInputFormat"; } - void setReadBuffer(ReadBuffer & in_) override; - private: HiveTextRowInputFormat( - const Block & header_, std::unique_ptr buf_, const Params & params_, const FormatSettings & format_settings_); - - std::unique_ptr buf; + const Block & header_, std::shared_ptr buf_, const Params & params_, const FormatSettings & format_settings_); }; class HiveTextFormatReader final : public CSVFormatReader @@ -35,10 +31,7 @@ public: std::vector readNames() override; std::vector readTypes() override; - void setReadBuffer(ReadBuffer & buf_) override; - private: - PeekableReadBuffer * buf; std::vector input_field_names; }; diff --git a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp index 8bf0ecc5d7e..17bade02a58 100644 --- a/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONAsStringRowInputFormat.cpp @@ -120,7 +120,7 @@ void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) bool quotes = false; if (*buf->position() != '{') - throw Exception("JSON object must begin with '{'.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "JSON object must begin with '{'."); ++buf->position(); ++balance; @@ -130,7 +130,7 @@ void JSONAsStringRowInputFormat::readJSONObject(IColumn & column) while (balance) { if (buf->eof()) - throw Exception("Unexpected end of file while parsing JSON object.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected end of file while parsing JSON object."); if (quotes) { diff --git a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp index d23c16c1437..1e8f57aa9a6 100644 --- a/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsWithMetadataBlockOutputFormat.cpp @@ -44,7 +44,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeExtremes(Chunk chunk) { auto num_rows = chunk.getNumRows(); if (num_rows != 2) - throw Exception("Got " + toString(num_rows) + " in extremes chunk, expected 2", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in extremes chunk, expected 2", num_rows); const auto & columns = chunk.getColumns(); JSONUtils::writeFieldDelimiter(*ostr, 2); @@ -66,7 +66,7 @@ void JSONColumnsWithMetadataBlockOutputFormat::consumeTotals(Chunk chunk) { auto num_rows = chunk.getNumRows(); if (num_rows != 1) - throw Exception("Got " + toString(num_rows) + " in totals chunk, expected 1", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got {} in totals chunk, expected 1", num_rows); const auto & columns = chunk.getColumns(); JSONUtils::writeFieldDelimiter(*ostr, 2); diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index e9c28099c5f..b91345bebe3 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -186,7 +187,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader( { } -DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes() +DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl() { if (first_row) first_row = false; diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index 2151967517a..bb699f0ca2e 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -79,7 +79,7 @@ public: JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; void transformFinalTypeIfNeeded(DataTypePtr & type) override; diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index a8881c5f398..148c639f939 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -122,7 +122,7 @@ StringRef JSONEachRowRowInputFormat::readColumnName(ReadBuffer & buf) void JSONEachRowRowInputFormat::skipUnknownField(StringRef name_ref) { if (!format_settings.skip_unknown_fields) - throw Exception("Unknown field found while parsing JSONEachRow format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field found while parsing JSONEachRow format: {}", name_ref.toString()); skipJSONField(*in, name_ref); } @@ -130,7 +130,7 @@ void JSONEachRowRowInputFormat::skipUnknownField(StringRef name_ref) void JSONEachRowRowInputFormat::readField(size_t index, MutableColumns & columns) { if (seen_columns[index]) - throw Exception("Duplicate field found while parsing JSONEachRow format: " + columnName(index), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate field found while parsing JSONEachRow format: {}", columnName(index)); seen_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; @@ -180,7 +180,7 @@ void JSONEachRowRowInputFormat::readJSONObject(MutableColumns & columns) else if (column_index == NESTED_FIELD) readNestedData(name_ref.toString(), columns); else - throw Exception("Logical error: illegal value of column_index", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: illegal value of column_index"); } else { diff --git a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp index 677f8bb28ec..a86d68c17ff 100644 --- a/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/LineAsStringRowInputFormat.cpp @@ -19,7 +19,7 @@ LineAsStringRowInputFormat::LineAsStringRowInputFormat(const Block & header_, Re if (header_.columns() != 1 || !typeid_cast(header_.getByPosition(0).column.get())) { - throw Exception("This input format is only suitable for tables with a single column of type String.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "This input format is only suitable for tables with a single column of type String."); } } diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 0e8421566ab..f337eedbb05 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -384,7 +384,7 @@ bool MsgPackVisitor::visit_ext(const char * value, uint32_t size) void MsgPackVisitor::parse_error(size_t, size_t) // NOLINT { - throw Exception("Error occurred while parsing msgpack data.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Error occurred while parsing msgpack data."); } bool MsgPackRowInputFormat::readObject() @@ -398,7 +398,7 @@ bool MsgPackRowInputFormat::readObject() { buf->position() = buf->buffer().end(); if (buf->eof()) - throw Exception("Unexpected end of file while parsing msgpack object.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected end of file while parsing msgpack object."); buf->position() = buf->buffer().end(); buf->makeContinuousMemoryFromCheckpointToPos(); buf->rollbackToCheckpoint(); @@ -421,7 +421,7 @@ bool MsgPackRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & if (!has_more_data) { if (column_index != 0) - throw Exception("Not enough values to complete the row.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Not enough values to complete the row."); return false; } return true; @@ -436,7 +436,9 @@ MsgPackSchemaReader::MsgPackSchemaReader(ReadBuffer & in_, const FormatSettings : IRowSchemaReader(buf, format_settings_), buf(in_), number_of_columns(format_settings_.msgpack.number_of_columns) { if (!number_of_columns) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "You must specify setting input_format_msgpack_number_of_columns to extract table schema from MsgPack data"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "You must specify setting input_format_msgpack_number_of_columns " + "to extract table schema from MsgPack data"); } @@ -461,7 +463,7 @@ msgpack::object_handle MsgPackSchemaReader::readObject() { buf.position() = buf.buffer().end(); if (buf.eof()) - throw Exception("Unexpected end of file while parsing msgpack object", ErrorCodes::UNEXPECTED_END_OF_FILE); + throw Exception(ErrorCodes::UNEXPECTED_END_OF_FILE, "Unexpected end of file while parsing msgpack object"); buf.position() = buf.buffer().end(); buf.makeContinuousMemoryFromCheckpointToPos(); buf.rollbackToCheckpoint(); diff --git a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp index c9b41ee10bb..a1ed45ec40f 100644 --- a/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowOutputFormat.cpp @@ -208,7 +208,7 @@ void MsgPackRowOutputFormat::serializeField(const IColumn & column, DataTypePtr default: break; } - throw Exception("Type " + data_type->getName() + " is not supported for MsgPack output format", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for MsgPack output format", data_type->getName()); } void MsgPackRowOutputFormat::write(const Columns & columns, size_t row_num) diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index c3f7b4e0ad7..bf55fe88469 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -337,7 +337,8 @@ static void readFirstCreateAndInsertQueries(ReadBuffer & in, String & table_name } if (!insert_query_present) - throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There is no INSERT queries{} in MySQL dump file", table_name.empty() ? "" : " for table " + table_name); + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There is no INSERT queries{} in MySQL dump file", + table_name.empty() ? "" : " for table " + table_name); skipToDataInInsertQuery(in, column_names.empty() ? &column_names : nullptr); } diff --git a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp index 2605719d1ec..42c3e178436 100644 --- a/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockOutputFormat.cpp @@ -153,7 +153,7 @@ std::unique_ptr ORCBlockOutputFormat::getORCType(const DataTypePtr & } default: { - throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for ORC output format", type->getName()); } } } @@ -462,7 +462,7 @@ void ORCBlockOutputFormat::writeColumn( break; } default: - throw Exception("Type " + type->getName() + " is not supported for ORC output format", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Type {} is not supported for ORC output format", type->getName()); } } diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp index 19ec3772da0..293bf4f73f3 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.cpp @@ -161,6 +161,12 @@ Chunk ParallelParsingInputFormat::generate() /// Delayed launching of segmentator thread if (unlikely(!parsing_started.exchange(true))) { + /// Lock 'finish_and_wait_mutex' to avoid recreation of + /// 'segmentator_thread' after it was joined. + std::lock_guard finish_and_wait_lock(finish_and_wait_mutex); + if (finish_and_wait_called) + return {}; + segmentator_thread = ThreadFromGlobalPool( &ParallelParsingInputFormat::segmentatorThreadFunction, this, CurrentThread::getGroup()); } diff --git a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h index a16471ecc42..252b6ec3f81 100644 --- a/src/Processors/Formats/Impl/ParallelParsingInputFormat.h +++ b/src/Processors/Formats/Impl/ParallelParsingInputFormat.h @@ -111,7 +111,7 @@ public: void resetParser() override final { - throw Exception("resetParser() is not allowed for " + getName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "resetParser() is not allowed for {}", getName()); } const BlockMissingValues & getMissingValues() const override final @@ -172,8 +172,8 @@ private: case IProcessor::Status::NeedData: break; case IProcessor::Status::Async: break; case IProcessor::Status::ExpandPipeline: - throw Exception("One of the parsers returned status " + IProcessor::statusToName(status) + - " during parallel parsing", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "One of the parsers returned status {} during parallel parsing", + IProcessor::statusToName(status)); } } } diff --git a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp index e99b308b87b..d889c156e18 100644 --- a/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockOutputFormat.cpp @@ -50,14 +50,14 @@ void ParquetBlockOutputFormat::consume(Chunk chunk) props, /*parquet::default_writer_properties(),*/ &file_writer); if (!status.ok()) - throw Exception{"Error while opening a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while opening a table: {}", status.ToString()); } // TODO: calculate row_group_size depending on a number of rows and table size auto status = file_writer->WriteTable(*arrow_table, format_settings.parquet.row_group_size); if (!status.ok()) - throw Exception{"Error while writing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while writing a table: {}", status.ToString()); } void ParquetBlockOutputFormat::finalizeImpl() @@ -71,7 +71,7 @@ void ParquetBlockOutputFormat::finalizeImpl() auto status = file_writer->Close(); if (!status.ok()) - throw Exception{"Error while closing a table: " + status.ToString(), ErrorCodes::UNKNOWN_EXCEPTION}; + throw Exception(ErrorCodes::UNKNOWN_EXCEPTION, "Error while closing a table: {}", status.ToString()); } void ParquetBlockOutputFormat::resetFormatterImpl() diff --git a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp index 3faeefbaabd..129c9ca3156 100644 --- a/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp +++ b/src/Processors/Formats/Impl/ProtobufRowOutputFormat.cpp @@ -40,9 +40,9 @@ ProtobufRowOutputFormat::ProtobufRowOutputFormat( void ProtobufRowOutputFormat::write(const Columns & columns, size_t row_num) { if (!allow_multiple_rows && !first_row) - throw Exception( - "The ProtobufSingle format can't be used to write multiple rows because this format doesn't have any row delimiter.", - ErrorCodes::NO_ROW_DELIMITER); + throw Exception(ErrorCodes::NO_ROW_DELIMITER, + "The ProtobufSingle format can't be used " + "to write multiple rows because this format doesn't have any row delimiter."); if (row_num == 0) serializer->setColumns(columns.data(), columns.size()); diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index ef577395d8f..74ce7d7f2ac 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -59,11 +59,12 @@ bool RegexpFieldExtractor::parseRow(PeekableReadBuffer & buf) static_cast(re2_arguments_ptrs.size())); if (!match && !skip_unmatched) - throw Exception("Line \"" + std::string(buf.position(), line_to_match) + "\" doesn't match the regexp.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Line \"{}\" doesn't match the regexp.", + std::string(buf.position(), line_to_match)); buf.position() += line_size; if (!buf.eof() && !checkChar('\n', buf)) - throw Exception("No \\n at the end of line.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No \\n at the end of line."); return match; } @@ -109,7 +110,7 @@ bool RegexpRowInputFormat::readField(size_t index, MutableColumns & columns) void RegexpRowInputFormat::readFieldsFromMatch(MutableColumns & columns, RowReadExtension & ext) { if (field_extractor.getMatchedFieldsSize() != columns.size()) - throw Exception("The number of matched fields in line doesn't match the number of columns.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "The number of matched fields in line doesn't match the number of columns."); ext.read_columns.assign(columns.size(), false); for (size_t columns_index = 0; columns_index < columns.size(); ++columns_index) diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index f5f05453f25..942d0511b1b 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -84,7 +84,7 @@ static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp) tmp.append(buf.position(), next_pos - buf.position()); buf.position() += next_pos + 1 - buf.position(); if (buf.eof()) - throw Exception("Cannot parse escape sequence", ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE); + throw Exception(ErrorCodes::CANNOT_PARSE_ESCAPE_SEQUENCE, "Cannot parse escape sequence"); tmp.push_back(parseEscapeSequence(*buf.position())); ++buf.position(); @@ -130,7 +130,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex if (!it) { if (!format_settings.skip_unknown_fields) - throw Exception("Unknown field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Unknown field found while parsing TSKV format: {}", name_ref.toString()); /// If the key is not found, skip the value. NullOutput sink; @@ -141,7 +141,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex index = it->getMapped(); if (seen_columns[index]) - throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Duplicate field found while parsing TSKV format: {}", name_ref.toString()); seen_columns[index] = read_columns[index] = true; const auto & type = getPort().getHeader().getByPosition(index).type; @@ -156,7 +156,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex { /// The only thing that can go without value is `tskv` fragment that is ignored. if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4))) - throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Found field without value while parsing TSKV format: {}", name_ref.toString()); } if (in->eof()) @@ -182,7 +182,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex seen_columns[index] = read_columns[index] = false; } - throw Exception("Found garbage after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED); + throw Exception(ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED, "Found garbage after field in TSKV format: {}", name_ref.toString()); } } } @@ -255,7 +255,7 @@ NamesAndTypesList TSKVSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof) { /// The only thing that can go without value is `tskv` fragment that is ignored. if (!(name_ref.size == 4 && 0 == memcmp(name_ref.data, "tskv", 4))) - throw Exception("Found field without value while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Found field without value while parsing TSKV format: {}", name_ref.toString()); } } diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 174a41a8a59..868639e66c2 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -25,11 +25,10 @@ namespace ErrorCodes static void checkForCarriageReturn(ReadBuffer & in) { if (!in.eof() && (in.position()[0] == '\r' || (in.position() != in.buffer().begin() && in.position()[-1] == '\r'))) - throw Exception("\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." + throw Exception(ErrorCodes::INCORRECT_DATA, "\nYou have carriage return (\\r, 0x0D, ASCII 13) at end of first row." "\nIt's like your input data has DOS/Windows style line separators, that are illegal in TabSeparated format." " You must transform your file to Unix format." - "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r.", - ErrorCodes::INCORRECT_DATA); + "\nBut if you really need carriage return at end of string value of last column, you need to escape it as \\r."); } TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( @@ -40,49 +39,74 @@ TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( bool with_types_, bool is_raw_, const FormatSettings & format_settings_) + : TabSeparatedRowInputFormat(header_, std::make_unique(in_), params_, with_names_, with_types_, is_raw_, format_settings_) +{ +} + +TabSeparatedRowInputFormat::TabSeparatedRowInputFormat( + const Block & header_, + std::unique_ptr in_, + const Params & params_, + bool with_names_, + bool with_types_, + bool is_raw, + const FormatSettings & format_settings_) : RowInputFormatWithNamesAndTypes( header_, - in_, + *in_, params_, false, with_names_, with_types_, format_settings_, - std::make_unique(in_, format_settings_, is_raw_)) + std::make_unique(*in_, format_settings_, is_raw), + format_settings_.tsv.try_detect_header) + , buf(std::move(in_)) { } -TabSeparatedFormatReader::TabSeparatedFormatReader(ReadBuffer & in_, const FormatSettings & format_settings_, bool is_raw_) - : FormatWithNamesAndTypesReader(in_, format_settings_), is_raw(is_raw_) +void TabSeparatedRowInputFormat::setReadBuffer(ReadBuffer & in_) +{ + buf->setSubBuffer(in_); +} + +TabSeparatedFormatReader::TabSeparatedFormatReader(PeekableReadBuffer & in_, const FormatSettings & format_settings_, bool is_raw_) + : FormatWithNamesAndTypesReader(in_, format_settings_), buf(&in_), is_raw(is_raw_) { } void TabSeparatedFormatReader::skipFieldDelimiter() { - assertChar('\t', *in); + assertChar('\t', *buf); } void TabSeparatedFormatReader::skipRowEndDelimiter() { - if (in->eof()) + if (buf->eof()) return; if (unlikely(first_row)) { - checkForCarriageReturn(*in); + checkForCarriageReturn(*buf); first_row = false; } - assertChar('\n', *in); + assertChar('\n', *buf); } +template String TabSeparatedFormatReader::readFieldIntoString() { String field; if (is_raw) - readString(field, *in); + readString(field, *buf); else - readEscapedString(field, *in); + { + if constexpr (read_string) + readEscapedString(field, *buf); + else + readTSVField(field, *buf); + } return field; } @@ -90,9 +114,9 @@ void TabSeparatedFormatReader::skipField() { NullOutput out; if (is_raw) - readStringInto(out, *in); + readStringInto(out, *buf); else - readEscapedStringInto(out, *in); + readEscapedStringInto(out, *buf); } void TabSeparatedFormatReader::skipHeaderRow() @@ -101,19 +125,20 @@ void TabSeparatedFormatReader::skipHeaderRow() { skipField(); } - while (checkChar('\t', *in)); + while (checkChar('\t', *buf)); skipRowEndDelimiter(); } -std::vector TabSeparatedFormatReader::readRow() +template +std::vector TabSeparatedFormatReader::readRowImpl() { std::vector fields; do { - fields.push_back(readFieldIntoString()); + fields.push_back(readFieldIntoString()); } - while (checkChar('\t', *in)); + while (checkChar('\t', *buf)); skipRowEndDelimiter(); return fields; @@ -122,8 +147,8 @@ std::vector TabSeparatedFormatReader::readRow() bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & /*column_name*/) { - const bool at_delimiter = !is_last_file_column && !in->eof() && *in->position() == '\t'; - const bool at_last_column_line_end = is_last_file_column && (in->eof() || *in->position() == '\n'); + const bool at_delimiter = !is_last_file_column && !buf->eof() && *buf->position() == '\t'; + const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n'); if (format_settings.tsv.empty_as_default && (at_delimiter || at_last_column_line_end)) { @@ -136,17 +161,17 @@ bool TabSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & t if (is_raw) { if (as_nullable) - return SerializationNullable::deserializeTextRawImpl(column, *in, format_settings, serialization); + return SerializationNullable::deserializeTextRawImpl(column, *buf, format_settings, serialization); - serialization->deserializeTextRaw(column, *in, format_settings); + serialization->deserializeTextRaw(column, *buf, format_settings); return true; } if (as_nullable) - return SerializationNullable::deserializeTextEscapedImpl(column, *in, format_settings, serialization); + return SerializationNullable::deserializeTextEscapedImpl(column, *buf, format_settings, serialization); - serialization->deserializeTextEscaped(column, *in, format_settings); + serialization->deserializeTextEscaped(column, *buf, format_settings); return true; } @@ -154,25 +179,25 @@ bool TabSeparatedFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer { try { - assertChar('\t', *in); + assertChar('\t', *buf); } catch (const DB::Exception &) { - if (*in->position() == '\n') + if (*buf->position() == '\n') { out << "ERROR: Line feed found where tab is expected." " It's like your file has less columns than expected.\n" "And if your file has the right number of columns, " "maybe it has an unescaped backslash in value before tab, which causes the tab to be escaped.\n"; } - else if (*in->position() == '\r') + else if (*buf->position() == '\r') { out << "ERROR: Carriage return found where tab is expected.\n"; } else { out << "ERROR: There is no tab. "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n"; } return false; @@ -183,22 +208,22 @@ bool TabSeparatedFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) { - if (in->eof()) + if (buf->eof()) return true; try { - assertChar('\n', *in); + assertChar('\n', *buf); } catch (const DB::Exception &) { - if (*in->position() == '\t') + if (*buf->position() == '\t') { out << "ERROR: Tab found where line feed is expected." " It's like your file has more columns than expected.\n" "And if your file has the right number of columns, maybe it has an unescaped tab in a value.\n"; } - else if (*in->position() == '\r') + else if (*buf->position() == '\r') { out << "ERROR: Carriage return found where line feed is expected." " It's like your file has DOS/Windows style line separators, that is illegal in TabSeparated format.\n"; @@ -206,7 +231,7 @@ bool TabSeparatedFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out) else { out << "ERROR: There is no line feed. "; - verbosePrintString(in->position(), in->position() + 1, out); + verbosePrintString(buf->position(), buf->position() + 1, out); out << " found instead.\n"; } return false; @@ -220,19 +245,19 @@ void TabSeparatedFormatReader::checkNullValueForNonNullable(DataTypePtr type) bool can_be_parsed_as_null = type->isNullable() || type->isLowCardinalityNullable() || format_settings.null_as_default; // check null value for type is not nullable. don't cross buffer bound for simplicity, so maybe missing some case - if (!can_be_parsed_as_null && !in->eof()) + if (!can_be_parsed_as_null && !buf->eof()) { - if (*in->position() == '\\' && in->available() >= 2) + if (*buf->position() == '\\' && buf->available() >= 2) { - ++in->position(); - if (*in->position() == 'N') + ++buf->position(); + if (*buf->position() == 'N') { - ++in->position(); + ++buf->position(); throw Exception(ErrorCodes::INCORRECT_DATA, "Unexpected NULL value of not Nullable type {}", type->getName()); } else { - --in->position(); + --buf->position(); } } } @@ -246,29 +271,43 @@ void TabSeparatedFormatReader::skipPrefixBeforeHeader() void TabSeparatedRowInputFormat::syncAfterError() { - skipToUnescapedNextLineOrEOF(*in); + skipToUnescapedNextLineOrEOF(*buf); +} + +void TabSeparatedFormatReader::setReadBuffer(ReadBuffer & in_) +{ + buf = assert_cast(&in_); + FormatWithNamesAndTypesReader::setReadBuffer(*buf); } TabSeparatedSchemaReader::TabSeparatedSchemaReader( ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( - in_, + buf, format_settings_, with_names_, with_types_, &reader, - getDefaultDataTypeForEscapingRule(is_raw_ ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped)) - , reader(in_, format_settings_, is_raw_) + getDefaultDataTypeForEscapingRule(is_raw_ ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped), + format_settings_.tsv.try_detect_header) + , buf(in_) + , reader(buf, format_settings_, is_raw_) { } -DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypes() +std::pair, DataTypes> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes() { - if (in.eof()) + if (buf.eof()) return {}; auto fields = reader.readRow(); - return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + return {fields, data_types}; +} + +DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypesImpl() +{ + return readRowAndGetFieldsAndDataTypes().second; } void registerInputFormatTabSeparated(FormatFactory & factory) @@ -309,7 +348,10 @@ void registerTSVSchemaReader(FormatFactory & factory) String result = getAdditionalFormatInfoByEscapingRule( settings, is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped); if (!with_names) - result += fmt::format(", column_names_for_schema_inference={}", settings.column_names_for_schema_inference); + result += fmt::format( + ", column_names_for_schema_inference={}, try_detect_header={}", + settings.column_names_for_schema_inference, + settings.tsv.try_detect_header); return result; }); } @@ -337,7 +379,7 @@ static std::pair fileSegmentationEngineTabSeparatedImpl(ReadBuffer pos = find_first_symbols<'\\', '\r', '\n'>(pos, in.buffer().end()); if (pos > in.buffer().end()) - throw Exception("Position in buffer is out of bounds. There must be a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Position in buffer is out of bounds. There must be a bug."); else if (pos == in.buffer().end()) continue; @@ -377,10 +419,10 @@ void registerFileSegmentationEngineTabSeparated(FormatFactory & factory) { for (bool is_raw : {false, true}) { - auto register_func = [&](const String & format_name, bool with_names, bool with_types) + auto register_func = [&](const String & format_name, bool, bool) { - size_t min_rows = 1 + static_cast(with_names) + static_cast(with_types); - factory.registerFileSegmentationEngine(format_name, [is_raw, min_rows](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) + static constexpr size_t min_rows = 3; /// Make it 3 for header auto detection (first 3 rows must be always in the same segment). + factory.registerFileSegmentationEngine(format_name, [is_raw](ReadBuffer & in, DB::Memory<> & memory, size_t min_bytes, size_t max_rows) { return fileSegmentationEngineTabSeparatedImpl(in, memory, is_raw, min_bytes, min_rows, max_rows); }); diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h index 3476b974c3b..9edcf86b5de 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h @@ -22,16 +22,24 @@ public: String getName() const override { return "TabSeparatedRowInputFormat"; } + void setReadBuffer(ReadBuffer & in_) override; + private: + TabSeparatedRowInputFormat(const Block & header_, std::unique_ptr in_, const Params & params_, + bool with_names_, bool with_types_, bool is_raw, const FormatSettings & format_settings_); + bool allowSyncAfterError() const override { return true; } void syncAfterError() override; bool isGarbageAfterField(size_t, ReadBuffer::Position pos) override { return *pos != '\n' && *pos != '\t'; } + + + std::unique_ptr buf; }; class TabSeparatedFormatReader final : public FormatWithNamesAndTypesReader { public: - TabSeparatedFormatReader(ReadBuffer & in_, const FormatSettings & format_settings, bool is_raw_); + TabSeparatedFormatReader(PeekableReadBuffer & in_, const FormatSettings & format_settings, bool is_raw_); bool readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String & column_name) override; @@ -45,21 +53,32 @@ public: void skipRowEndDelimiter() override; void skipPrefixBeforeHeader() override; - std::vector readRow(); - std::vector readNames() override { return readRow(); } - std::vector readTypes() override { return readRow(); } + std::vector readRow() { return readRowImpl(); } + std::vector readNames() override { return readHeaderRow(); } + std::vector readTypes() override { return readHeaderRow(); } + std::vector readHeaderRow() { return readRowImpl(); } + + template String readFieldIntoString(); + std::vector readRowForHeaderDetection() override { return readHeaderRow(); } + void checkNullValueForNonNullable(DataTypePtr type) override; bool parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out) override; bool parseRowEndWithDiagnosticInfo(WriteBuffer & out) override; - FormatSettings::EscapingRule getEscapingRule() const + FormatSettings::EscapingRule getEscapingRule() const override { return is_raw ? FormatSettings::EscapingRule::Raw : FormatSettings::EscapingRule::Escaped; } + void setReadBuffer(ReadBuffer & in_) override; + private: + template + std::vector readRowImpl(); + + PeekableReadBuffer * buf; bool is_raw; bool first_row = true; }; @@ -70,8 +89,10 @@ public: TabSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings); private: - DataTypes readRowAndGetDataTypes() override; + DataTypes readRowAndGetDataTypesImpl() override; + std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() override; + PeekableReadBuffer buf; TabSeparatedFormatReader reader; }; diff --git a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp index 1b2af4e631c..6d8fe1e5a2c 100644 --- a/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateBlockOutputFormat.cpp @@ -88,7 +88,7 @@ TemplateBlockOutputFormat::ResultsetPart TemplateBlockOutputFormat::stringToResu else if (part == "bytes_read") return ResultsetPart::BytesRead; else - throw Exception("Unknown output part " + part, ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unknown output part {}", part); } void TemplateBlockOutputFormat::writeRow(const Chunk & chunk, size_t row_num) diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 05fa17c7a17..9cffe316606 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -544,8 +544,7 @@ static ParsedTemplateFormatString fillResultSetFormat(const FormatSettings & set { if (partName == "data") return 0; - throw Exception("Unknown input part " + partName, - ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Unknown input part {}", partName); }); } return resultset_format; diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 7b9cb23ddf0..9511b37ff15 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -318,8 +318,8 @@ namespace size_t dst_tuple_size = type_tuple.getElements().size(); if (src_tuple_size != dst_tuple_size) - throw Exception(fmt::format("Bad size of tuple. Expected size: {}, actual size: {}.", - std::to_string(src_tuple_size), std::to_string(dst_tuple_size)), ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Bad size of tuple. Expected size: {}, actual size: {}.", + src_tuple_size, dst_tuple_size); for (size_t i = 0; i < src_tuple_size; ++i) { @@ -454,8 +454,8 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx if (shouldDeduceNewTemplate(column_idx)) { if (templates[column_idx]) - throw DB::Exception("Template for column " + std::to_string(column_idx) + " already exists and it was not evaluated yet", - ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Template for column {} already exists and it was not evaluated yet", + std::to_string(column_idx)); std::exception_ptr exception; try { @@ -497,7 +497,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx { buf->rollbackToCheckpoint(); size_t len = const_cast((*token_iterator)->begin) - buf->position(); - throw Exception("Cannot deduce template of expression: " + std::string(buf->position(), len), ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot deduce template of expression: {}", std::string(buf->position(), len)); } } /// Continue parsing without template @@ -505,7 +505,7 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx } if (!format_settings.values.interpret_expressions) - throw Exception("Interpreting expressions is disabled", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Interpreting expressions is disabled"); /// Try to evaluate single expression if other parsers don't work buf->position() = const_cast((*token_iterator)->begin); @@ -528,10 +528,9 @@ bool ValuesBlockInputFormat::parseExpression(IColumn & column, size_t column_idx return false; } buf->rollbackToCheckpoint(); - throw Exception{"Cannot insert NULL value into a column of type '" + type.getName() + "'" - + " at: " + - String(buf->position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position())), - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Cannot insert NULL value into a column of type '{}' at: {}", + type.getName(), String(buf->position(), + std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf->buffer().end() - buf->position()))); } column.insert(value); @@ -593,12 +592,12 @@ void ValuesBlockInputFormat::readSuffix() ++buf->position(); skipWhitespaceIfAny(*buf); if (buf->hasUnreadData()) - throw Exception("Cannot read data after semicolon", ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data after semicolon"); return; } if (buf->hasUnreadData()) - throw Exception("Unread data in PeekableReadBuffer will be lost. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unread data in PeekableReadBuffer will be lost. Most likely it's a bug."); } void ValuesBlockInputFormat::resetParser() diff --git a/src/Processors/Formats/InputFormatErrorsLogger.cpp b/src/Processors/Formats/InputFormatErrorsLogger.cpp index 88d27abf610..71d51f0e04a 100644 --- a/src/Processors/Formats/InputFormatErrorsLogger.cpp +++ b/src/Processors/Formats/InputFormatErrorsLogger.cpp @@ -37,7 +37,9 @@ InputFormatErrorsLogger::InputFormatErrorsLogger(const ContextPtr & context) auto user_files_path = context->getUserFilesPath(); errors_file_path = fs::path(user_files_path) / path_in_setting; if (!fileOrSymlinkPathStartsWith(errors_file_path, user_files_path)) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Cannot log errors in path `{}`, because it is not inside `{}`", errors_file_path, user_files_path); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Cannot log errors in path `{}`, because it is not inside `{}`", + errors_file_path, user_files_path); } else { diff --git a/src/Processors/Formats/PullingOutputFormat.cpp b/src/Processors/Formats/PullingOutputFormat.cpp index af237037a72..c2036ce37c9 100644 --- a/src/Processors/Formats/PullingOutputFormat.cpp +++ b/src/Processors/Formats/PullingOutputFormat.cpp @@ -14,8 +14,7 @@ WriteBuffer PullingOutputFormat::out(nullptr, 0); void PullingOutputFormat::consume(Chunk chunk) { if (data) - throw Exception("PullingOutputFormat cannot consume chunk because it already has data", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "PullingOutputFormat cannot consume chunk because it already has data"); if (chunk) info.update(chunk.getNumRows(), chunk.allocatedBytes()); diff --git a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp index 35a86bc476d..6358a99d6b4 100644 --- a/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp +++ b/src/Processors/Formats/RowInputFormatWithDiagnosticInfo.cpp @@ -138,7 +138,7 @@ bool RowInputFormatWithDiagnosticInfo::deserializeFieldAndPrintDiagnosticInfo(co auto * curr_position = in->position(); if (curr_position < prev_position) - throw Exception("Logical error: parsing is non-deterministic.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: parsing is non-deterministic."); if (isNativeNumber(type) || isDate(type) || isDateTime(type) || isDateTime64(type)) { diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp index e0e8ea47a7b..eaedbbb4a1e 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp @@ -1,9 +1,13 @@ #include #include #include +#include #include +#include #include #include +#include +#include namespace DB @@ -12,6 +16,30 @@ namespace DB namespace ErrorCodes { extern const int INCORRECT_DATA; + extern const int LOGICAL_ERROR; +} + +namespace +{ + bool checkIfAllValuesAreTypeNames(const std::vector & names) + { + for (const auto & name : names) + { + if (!DataTypeFactory::instance().tryGet(name)) + return false; + } + return true; + } + + bool isSubsetOf(const std::unordered_set & subset, const std::unordered_set & set) + { + for (const auto & element : subset) + { + if (!set.contains(element)) + return false; + } + return true; + } } RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( @@ -22,7 +50,8 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( bool with_names_, bool with_types_, const FormatSettings & format_settings_, - std::unique_ptr format_reader_) + std::unique_ptr format_reader_, + bool try_detect_header_) : RowInputFormatWithDiagnosticInfo(header_, in_, params_) , format_settings(format_settings_) , data_types(header_.getDataTypes()) @@ -30,6 +59,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes( , with_names(with_names_) , with_types(with_types_) , format_reader(std::move(format_reader_)) + , try_detect_header(try_detect_header_) { column_indexes_by_names = getPort().getHeader().getNamesToIndexesMap(); } @@ -52,53 +82,117 @@ void RowInputFormatWithNamesAndTypes::readPrefix() /// Skip prefix before names and types. format_reader->skipPrefixBeforeHeader(); + std::vector column_names; + std::vector type_names; if (with_names) - { - if (format_settings.with_names_use_header) - { - auto column_names = format_reader->readNames(); - column_mapping->addColumns(column_names, column_indexes_by_names, format_settings); - } - else - { - column_mapping->setupByHeader(getPort().getHeader()); - format_reader->skipNames(); - } - } - else if (!column_mapping->is_set) - column_mapping->setupByHeader(getPort().getHeader()); + column_names = format_reader->readNames(); if (with_types) { /// Skip delimiter between names and types. format_reader->skipRowBetweenDelimiter(); - if (format_settings.with_types_use_header) + type_names = format_reader->readTypes(); + } + + if (!with_names && !with_types && try_detect_header) + tryDetectHeader(column_names, type_names); + + if (!column_names.empty()) + { + if (format_settings.with_names_use_header) + column_mapping->addColumns(column_names, column_indexes_by_names, format_settings); + else + column_mapping->setupByHeader(getPort().getHeader()); + } + else if (!column_mapping->is_set) + column_mapping->setupByHeader(getPort().getHeader()); + + if (!type_names.empty() && format_settings.with_types_use_header) + { + if (type_names.size() != column_mapping->column_indexes_for_input_fields.size()) + throw Exception( + ErrorCodes::INCORRECT_DATA, + "The number of data types differs from the number of column names in input data"); + + /// Check that types from input matches types from header. + for (size_t i = 0; i < type_names.size(); ++i) { - auto types = format_reader->readTypes(); - if (types.size() != column_mapping->column_indexes_for_input_fields.size()) + if (column_mapping->column_indexes_for_input_fields[i] && + data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName() != type_names[i]) + { throw Exception( ErrorCodes::INCORRECT_DATA, - "The number of data types differs from the number of column names in input data"); - - /// Check that types from input matches types from header. - for (size_t i = 0; i < types.size(); ++i) - { - if (column_mapping->column_indexes_for_input_fields[i] && - data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName() != types[i]) - { - throw Exception( - ErrorCodes::INCORRECT_DATA, - "Type of '{}' must be {}, not {}", - getPort().getHeader().getByPosition(*column_mapping->column_indexes_for_input_fields[i]).name, - data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName(), types[i]); - } + "Type of '{}' must be {}, not {}", + getPort().getHeader().getByPosition(*column_mapping->column_indexes_for_input_fields[i]).name, + data_types[*column_mapping->column_indexes_for_input_fields[i]]->getName(), type_names[i]); } } - else - format_reader->skipTypes(); } } +void RowInputFormatWithNamesAndTypes::tryDetectHeader(std::vector & column_names_out, std::vector & type_names_out) +{ + auto & read_buf = getReadBuffer(); + PeekableReadBuffer * peekable_buf = dynamic_cast(&read_buf); + if (!peekable_buf) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Header detection is supported only for formats that use PeekableReadBuffer"); + + /// Empty data. + if (unlikely(format_reader->checkForSuffix())) + { + end_of_stream = true; + return; + } + + /// Make a checkpoint before reading the first row. + peekable_buf->setCheckpoint(); + auto first_row_values = format_reader->readRowForHeaderDetection(); + + /// To understand if the first row is a header with column names, we check + /// that all values from this row is a subset of column names from provided header + /// or column names from provided header is a subset of values from this row + auto column_names = getPort().getHeader().getNames(); + std::unordered_set column_names_set(column_names.begin(), column_names.end()); + std::unordered_set first_row_values_set(first_row_values.begin(), first_row_values.end()); + if (!isSubsetOf(first_row_values_set, column_names_set) && !isSubsetOf(column_names_set, first_row_values_set)) + { + /// Rollback to the beginning of the first row to parse it as data later. + peekable_buf->rollbackToCheckpoint(true); + return; + } + + /// First row is a header with column names. + column_names_out = std::move(first_row_values); + peekable_buf->dropCheckpoint(); + is_header_detected = true; + + /// Data contains only 1 row and it's just names. + if (unlikely(format_reader->checkForSuffix())) + { + end_of_stream = true; + return; + } + + /// Make a checkpoint before reading the second row. + peekable_buf->setCheckpoint(); + + /// Skip delimiter between the first and the second rows. + format_reader->skipRowBetweenDelimiter(); + auto second_row_values = format_reader->readRowForHeaderDetection(); + + /// The second row can be a header with type names if it contains only valid type names. + if (!checkIfAllValuesAreTypeNames(second_row_values)) + { + /// Rollback to the beginning of the second row to parse it as data later. + peekable_buf->rollbackToCheckpoint(true); + return; + } + + /// The second row is a header with type names. + type_names_out = std::move(second_row_values); + peekable_buf->dropCheckpoint(); +} + bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadExtension & ext) { if (unlikely(end_of_stream)) @@ -112,7 +206,7 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE updateDiagnosticInfo(); - if (likely(row_num != 1 || (getCurrentUnitNumber() == 0 && (with_names || with_types)))) + if (likely(row_num != 1 || (getCurrentUnitNumber() == 0 && (with_names || with_types || is_header_detected)))) format_reader->skipRowBetweenDelimiter(); format_reader->skipRowStartDelimiter(); @@ -234,8 +328,9 @@ FormatWithNamesAndTypesSchemaReader::FormatWithNamesAndTypesSchemaReader( bool with_names_, bool with_types_, FormatWithNamesAndTypesReader * format_reader_, - DataTypePtr default_type_) - : IRowSchemaReader(in_, format_settings_, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_) + DataTypePtr default_type_, + bool try_detect_header_) + : IRowSchemaReader(in_, format_settings_, default_type_), with_names(with_names_), with_types(with_types_), format_reader(format_reader_), try_detect_header(try_detect_header_) { } @@ -246,31 +341,173 @@ NamesAndTypesList FormatWithNamesAndTypesSchemaReader::readSchema() format_reader->skipPrefixBeforeHeader(); - Names names; + std::vector column_names; if (with_names) - names = format_reader->readNames(); + column_names = format_reader->readNames(); + std::vector data_type_names; if (with_types) { format_reader->skipRowBetweenDelimiter(); - std::vector data_type_names = format_reader->readTypes(); - if (data_type_names.size() != names.size()) + data_type_names = format_reader->readTypes(); + } + + if (!with_names && !with_types && try_detect_header) + tryDetectHeader(column_names, data_type_names); + + if (!data_type_names.empty()) + { + if (data_type_names.size() != column_names.size()) throw Exception( ErrorCodes::INCORRECT_DATA, - "The number of column names {} differs with the number of types {}", names.size(), data_type_names.size()); + "The number of column names {} differs with the number of types {}", column_names.size(), data_type_names.size()); NamesAndTypesList result; for (size_t i = 0; i != data_type_names.size(); ++i) - result.emplace_back(names[i], DataTypeFactory::instance().get(data_type_names[i])); + result.emplace_back(column_names[i], DataTypeFactory::instance().get(data_type_names[i])); return result; } - if (!names.empty()) - setColumnNames(names); + if (!column_names.empty()) + setColumnNames(column_names); /// We should determine types by reading rows with data. Use the implementation from IRowSchemaReader. return IRowSchemaReader::readSchema(); } +namespace +{ + bool checkIfAllTypesAreString(const DataTypes & types) + { + for (const auto & type : types) + if (!type || !isString(removeNullable(removeLowCardinality(type)))) + return false; + return true; + } + + bool haveNotStringAndNotNullType(const DataTypes & types) + { + for (const auto & type : types) + if (type && !isString(removeNullable(removeLowCardinality(type))) && !type->onlyNull()) + return true; + return false; + } +} + +void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector & column_names, std::vector & type_names) +{ + auto [first_row_values, first_row_types] = readRowAndGetFieldsAndDataTypes(); + + /// No data. + if (first_row_values.empty()) + return; + + /// The first row contains non String elements, it cannot be a header. + if (!checkIfAllTypesAreString(first_row_types)) + { + buffered_types = first_row_types; + return; + } + + auto [second_row_values, second_row_types] = readRowAndGetFieldsAndDataTypes(); + + /// Data contains only 1 row, don't treat it as a header. + if (second_row_values.empty()) + { + buffered_types = first_row_types; + return; + } + + DataTypes data_types; + bool second_row_can_be_type_names = checkIfAllTypesAreString(second_row_types) && checkIfAllValuesAreTypeNames(readNamesFromFields(second_row_values)); + size_t row = 2; + if (!second_row_can_be_type_names) + { + data_types = second_row_types; + } + else + { + data_types = readRowAndGetDataTypes(); + /// Data contains only 2 rows. + if (data_types.empty()) + { + second_row_can_be_type_names = false; + data_types = second_row_types; + } + else + { + ++row; + } + } + + /// Create default names c1,c2,... for better exception messages. + std::vector default_colum_names; + default_colum_names.reserve(first_row_types.size()); + for (size_t i = 0; i != first_row_types.size(); ++i) + default_colum_names.push_back("c" + std::to_string(i + 1)); + + while (true) + { + /// Check if we have element that is not String and not Null. It means that the first two rows + /// with all String elements are most likely a header. + if (haveNotStringAndNotNullType(data_types)) + { + buffered_types = data_types; + column_names = readNamesFromFields(first_row_values); + if (second_row_can_be_type_names) + type_names = readNamesFromFields(second_row_values); + return; + } + + /// Check if we have all elements with type String. It means that the first two rows + /// with all String elements can be real data and we cannot use them as a header. + if (checkIfAllTypesAreString(data_types)) + { + buffered_types = std::move(data_types); + return; + } + + auto next_row_types = readRowAndGetDataTypes(); + /// Check if there are no more rows in data. It means that all rows contains only String values and Nulls, + /// so, the first two rows with all String elements can be real data and we cannot use them as a header. + if (next_row_types.empty()) + { + /// Buffer first data types from the first row, because it doesn't contain Nulls. + buffered_types = first_row_types; + return; + } + + ++row; + /// Combine types from current row and from previous rows. + chooseResultColumnTypes(*this, data_types, next_row_types, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row); + } +} + +DataTypes FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes() +{ + /// Check if we tried to detect a header and have buffered types from read rows. + if (!buffered_types.empty()) + { + DataTypes res; + std::swap(res, buffered_types); + return res; + } + + return readRowAndGetDataTypesImpl(); +} + +std::vector FormatWithNamesAndTypesSchemaReader::readNamesFromFields(const std::vector & fields) +{ + std::vector names; + names.reserve(fields.size()); + auto escaping_rule = format_reader->getEscapingRule(); + for (const auto & field : fields) + { + ReadBufferFromString field_buf(field); + names.emplace_back(readStringByEscapingRule(field_buf, escaping_rule, format_settings)); + } + return names; +} + } diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index b3066f0bdbb..94ad9fac445 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -8,6 +8,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int NOT_IMPLEMENTED; +} + class FormatWithNamesAndTypesReader; /// Base class for input formats with -WithNames and -WithNamesAndTypes suffixes. @@ -36,7 +41,8 @@ protected: bool with_names_, bool with_types_, const FormatSettings & format_settings_, - std::unique_ptr format_reader_); + std::unique_ptr format_reader_, + bool try_detect_header_ = false); void resetParser() override; bool isGarbageAfterField(size_t index, ReadBuffer::Position pos) override; @@ -53,10 +59,14 @@ private: bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override; void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override; + void tryDetectHeader(std::vector & column_names, std::vector & type_names); + bool is_binary; bool with_names; bool with_types; std::unique_ptr format_reader; + bool try_detect_header; + bool is_header_detected = false; protected: Block::NameMap column_indexes_by_names; @@ -86,6 +96,12 @@ public: /// Read row with types and return the list of them. virtual std::vector readTypes() = 0; + /// Read row with raw values. + virtual std::vector readRowForHeaderDetection() + { + throw Exception("Method readRowAndGetFieldsAndDataTypes is not implemented for format reader", ErrorCodes::NOT_IMPLEMENTED); + } + /// Skip single field, it's used to skip unknown columns. virtual void skipField(size_t file_column) = 0; /// Skip the whole row with names. @@ -109,6 +125,11 @@ public: virtual ~FormatWithNamesAndTypesReader() = default; + virtual FormatSettings::EscapingRule getEscapingRule() const + { + throw Exception("Format reader doesn't have an escaping rule", ErrorCodes::NOT_IMPLEMENTED); + } + protected: ReadBuffer * in; FormatSettings format_settings; @@ -129,18 +150,35 @@ public: bool with_names_, bool with_types_, FormatWithNamesAndTypesReader * format_reader_, - DataTypePtr default_type_ = nullptr); + DataTypePtr default_type_ = nullptr, + bool try_detect_header_ = false); NamesAndTypesList readSchema() override; protected: - virtual DataTypes readRowAndGetDataTypes() override = 0; + virtual DataTypes readRowAndGetDataTypes() override; + + virtual DataTypes readRowAndGetDataTypesImpl() + { + throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypesImpl is not implemented"}; + } + + /// Return column fields with inferred types. In case of no more rows, return empty vectors. + virtual std::pair, DataTypes> readRowAndGetFieldsAndDataTypes() + { + throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetFieldsAndDataTypes is not implemented"}; + } bool with_names; bool with_types; private: + void tryDetectHeader(std::vector & column_names_out, std::vector & type_names_out); + std::vector readNamesFromFields(const std::vector & fields); + FormatWithNamesAndTypesReader * format_reader; + bool try_detect_header; + DataTypes buffered_types; }; } diff --git a/src/Processors/IAccumulatingTransform.cpp b/src/Processors/IAccumulatingTransform.cpp index 35b45543151..00d5b2ee089 100644 --- a/src/Processors/IAccumulatingTransform.cpp +++ b/src/Processors/IAccumulatingTransform.cpp @@ -17,7 +17,7 @@ IAccumulatingTransform::IAccumulatingTransform(Block input_header, Block output_ InputPort * IAccumulatingTransform::addTotalsPort() { if (inputs.size() > 1) - throw Exception("Totals port was already added to IAccumulatingTransform", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals port was already added to IAccumulatingTransform"); return &inputs.emplace_back(getInputPort().getHeader(), this); } @@ -108,8 +108,9 @@ void IAccumulatingTransform::work() void IAccumulatingTransform::setReadyChunk(Chunk chunk) { if (current_output_chunk) - throw Exception("IAccumulatingTransform already has input. Cannot set another chunk. " - "Probably, setReadyChunk method was called twice per consume().", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "IAccumulatingTransform already has input. " + "Cannot set another chunk. Probably, setReadyChunk method was called twice per consume()."); current_output_chunk = std::move(chunk); } diff --git a/src/Processors/IInflatingTransform.cpp b/src/Processors/IInflatingTransform.cpp index f54d6458c10..ffa5b55dc76 100644 --- a/src/Processors/IInflatingTransform.cpp +++ b/src/Processors/IInflatingTransform.cpp @@ -67,7 +67,7 @@ void IInflatingTransform::work() if (can_generate) { if (generated) - throw Exception("IInflatingTransform cannot consume chunk because it already was generated", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it already was generated"); current_chunk = generate(); generated = true; @@ -76,7 +76,7 @@ void IInflatingTransform::work() else { if (!has_input) - throw Exception("IInflatingTransform cannot consume chunk because it wasn't read", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IInflatingTransform cannot consume chunk because it wasn't read"); consume(std::move(current_chunk)); has_input = false; diff --git a/src/Processors/LimitTransform.cpp b/src/Processors/LimitTransform.cpp index ffff8c30904..2feee7e65b1 100644 --- a/src/Processors/LimitTransform.cpp +++ b/src/Processors/LimitTransform.cpp @@ -19,7 +19,7 @@ LimitTransform::LimitTransform( , with_ties(with_ties_), description(std::move(description_)) { if (num_streams != 1 && with_ties) - throw Exception("Cannot use LimitTransform with multiple ports and ties", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot use LimitTransform with multiple ports and ties"); ports_data.resize(num_streams); @@ -125,8 +125,7 @@ IProcessor::Status LimitTransform::prepare( LimitTransform::Status LimitTransform::prepare() { if (ports_data.size() != 1) - throw Exception("prepare without arguments is not supported for multi-port LimitTransform", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "prepare without arguments is not supported for multi-port LimitTransform"); return prepare({0}, {0}); } diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index db08f3ffbd3..560be60987b 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -221,7 +221,7 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::finishGroup() void AggregatingSortedAlgorithm::AggregatingMergedData::addRow(SortCursor & cursor) { if (!is_group_started) - throw Exception("Can't add a row to the group because it was not started.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't add a row to the group because it was not started."); for (auto & desc : def.columns_to_aggregate) desc.column->insertMergeFrom(*cursor->all_columns[desc.column_number], cursor->getRow()); @@ -236,7 +236,7 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::addRow(SortCursor & curs Chunk AggregatingSortedAlgorithm::AggregatingMergedData::pull() { if (is_group_started) - throw Exception("Can't pull chunk because group was not finished.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't pull chunk because group was not finished."); auto chunk = MergedData::pull(); postprocessChunk(chunk, def); diff --git a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp index 1cca2510197..0c23dd51f3c 100644 --- a/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/CollapsingSortedAlgorithm.cpp @@ -194,8 +194,7 @@ IMergingAlgorithm::Status CollapsingSortedAlgorithm::merge() last_is_positive = false; } else - throw Exception("Incorrect data: Sign = " + toString(sign) + " (must be 1 or -1).", - ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Incorrect data: Sign = {} (must be 1 or -1).", toString(sign)); ++current_pos; diff --git a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h index ff8f113d9a6..3f04d087861 100644 --- a/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h +++ b/src/Processors/Merges/Algorithms/FixedSizeDequeWithGaps.h @@ -144,20 +144,20 @@ private: void checkEnoughSpaceToInsert() const { if (size() + 1 == container.size()) - throw Exception("Not enough space to insert into FixedSizeDequeWithGaps with capacity " - + std::to_string(container.size() - 1), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough space to insert into FixedSizeDequeWithGaps with capacity {}", + container.size() - 1); } void checkHasValuesToRemove() const { if (empty()) - throw Exception("Cannot remove from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot remove from empty FixedSizeDequeWithGaps"); } void checkHasValuesToGet() const { if (empty()) - throw Exception("Cannot get value from empty FixedSizeDequeWithGaps", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot get value from empty FixedSizeDequeWithGaps"); } }; diff --git a/src/Processors/Merges/Algorithms/Graphite.cpp b/src/Processors/Merges/Algorithms/Graphite.cpp index 0616c4bd6e6..6b4e3217bbe 100644 --- a/src/Processors/Merges/Algorithms/Graphite.cpp +++ b/src/Processors/Merges/Algorithms/Graphite.cpp @@ -43,7 +43,7 @@ const String & ruleTypeStr(RuleType rule_type) } catch (...) { - throw Exception("invalid rule type: " + std::to_string(rule_type), DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "invalid rule type: {}", std::to_string(rule_type)); } } @@ -58,7 +58,7 @@ RuleType ruleType(const String & s) else if (s == "tag_list") return RuleTypeTagList; else - throw Exception("invalid rule type: " + s, DB::ErrorCodes::BAD_ARGUMENTS); + throw Exception(DB::ErrorCodes::BAD_ARGUMENTS, "invalid rule type: {}", s); } static const Graphite::Pattern undef_pattern = @@ -374,7 +374,7 @@ static const Pattern & appendGraphitePattern( .precision = config.getUInt(config_element + "." + key + ".precision")}); } else - throw Exception("Unknown element in config: " + key, DB::ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(DB::ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key); } if (!pattern.regexp_str.empty()) @@ -416,7 +416,8 @@ static const Pattern & appendGraphitePattern( if (pattern.type & pattern.TypeAggregation) /// TypeAggregation or TypeAll if (pattern.function->allocatesMemoryInArena()) throw Exception( - "Aggregate function " + pattern.function->getName() + " isn't supported in GraphiteMergeTree", DB::ErrorCodes::NOT_IMPLEMENTED); + "Aggregate function " + pattern.function->getName() + " isn't supported " + "in GraphiteMergeTree", DB::ErrorCodes::NOT_IMPLEMENTED); /// retention should be in descending order of age. if (pattern.type & pattern.TypeRetention) /// TypeRetention or TypeAll @@ -431,7 +432,7 @@ void setGraphitePatternsFromConfig(ContextPtr context, const String & config_ele const auto & config = context->getConfigRef(); if (!config.has(config_element)) - throw Exception("No '" + config_element + "' element in configuration file", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No '{}' element in configuration file", config_element); params.config_name = config_element; params.path_column_name = config.getString(config_element + ".path_column_name", "Path"); @@ -460,7 +461,7 @@ void setGraphitePatternsFromConfig(ContextPtr context, const String & config_ele /// See above. } else - throw Exception("Unknown element in config: " + key, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}", key); } if (config.has(config_element + ".default")) @@ -486,7 +487,7 @@ void setGraphitePatternsFromConfig(ContextPtr context, const String & config_ele } else { - throw Exception("Unhandled rule_type in config: " + ruleTypeStr(pattern.rule_type), ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unhandled rule_type in config: {}", ruleTypeStr(pattern.rule_type)); } } } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index c5937fe0bc5..123748f9b43 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -33,7 +33,7 @@ static GraphiteRollupSortedAlgorithm::ColumnsDefinition defineColumns( def.unmodified_column_numbers.push_back(i); if (!WhichDataType(header.getByPosition(def.value_column_num).type).isFloat64()) - throw Exception("Only `Float64` data type is allowed for the value column of GraphiteMergeTree", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Only `Float64` data type is allowed for the value column of GraphiteMergeTree"); return def; } diff --git a/src/Processors/Merges/Algorithms/MergedData.h b/src/Processors/Merges/Algorithms/MergedData.h index 6029809f0f2..24b83013aee 100644 --- a/src/Processors/Merges/Algorithms/MergedData.h +++ b/src/Processors/Merges/Algorithms/MergedData.h @@ -57,8 +57,7 @@ public: void insertChunk(Chunk && chunk, size_t rows_size) { if (merged_rows) - throw Exception("Cannot insert to MergedData from Chunk because MergedData is not empty.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert to MergedData from Chunk because MergedData is not empty."); UInt64 num_rows = chunk.getNumRows(); columns = chunk.mutateColumns(); diff --git a/src/Processors/Merges/Algorithms/RowRef.h b/src/Processors/Merges/Algorithms/RowRef.h index a7646aa701f..cf741c1b53b 100644 --- a/src/Processors/Merges/Algorithms/RowRef.h +++ b/src/Processors/Merges/Algorithms/RowRef.h @@ -66,8 +66,8 @@ public: SharedChunkPtr alloc(Chunk & chunk) { if (free_chunks.empty()) - throw Exception("Not enough space in SharedChunkAllocator. " - "Chunks allocated: " + std::to_string(chunks.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not enough space in SharedChunkAllocator. Chunks allocated: {}", + chunks.size()); auto pos = free_chunks.back(); free_chunks.pop_back(); diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index ee3177e132f..0f1775d4ac0 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -486,9 +486,8 @@ static void setRow(Row & row, const ColumnRawPtrs & raw_columns, size_t row_num, if (i < column_names.size()) column_name = column_names[i]; - throw Exception("SummingSortedAlgorithm failed to read row " + toString(row_num) - + " of column " + toString(i) + (column_name.empty() ? "" : " (" + column_name + ")"), - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "SummingSortedAlgorithm failed to read row {} of column {})", + toString(row_num), toString(i) + (column_name.empty() ? "" : " (" + column_name)); } } } @@ -630,8 +629,7 @@ void SummingSortedAlgorithm::SummingMergedData::addRowImpl(ColumnRawPtrs & raw_c for (auto & desc : def.columns_to_aggregate) { if (!desc.created) - throw Exception("Logical error in SummingSortedAlgorithm, there are no description", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error in SummingSortedAlgorithm, there are no description"); if (desc.is_agg_func_type) { diff --git a/src/Processors/Merges/IMergingTransform.cpp b/src/Processors/Merges/IMergingTransform.cpp index 1b847069fea..4a6a1662f16 100644 --- a/src/Processors/Merges/IMergingTransform.cpp +++ b/src/Processors/Merges/IMergingTransform.cpp @@ -42,13 +42,13 @@ IMergingTransformBase::IMergingTransformBase( void IMergingTransformBase::onNewInput() { - throw Exception("onNewInput is not implemented for " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "onNewInput is not implemented for {}", getName()); } void IMergingTransformBase::addInput() { if (have_all_inputs) - throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IMergingTransform already have all inputs."); inputs.emplace_back(outputs.front().getHeader(), this); onNewInput(); @@ -57,7 +57,7 @@ void IMergingTransformBase::addInput() void IMergingTransformBase::setHaveAllInputs() { if (have_all_inputs) - throw Exception("IMergingTransform already have all inputs.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "IMergingTransform already have all inputs."); have_all_inputs = true; } diff --git a/src/Processors/OffsetTransform.cpp b/src/Processors/OffsetTransform.cpp index 40a0833ffb4..dbb8bebfce6 100644 --- a/src/Processors/OffsetTransform.cpp +++ b/src/Processors/OffsetTransform.cpp @@ -61,7 +61,8 @@ IProcessor::Status OffsetTransform::prepare(const PortNumbers & updated_input_po return; default: throw Exception( - ErrorCodes::LOGICAL_ERROR, "Unexpected status for OffsetTransform::preparePair : {}", IProcessor::statusToName(status)); + ErrorCodes::LOGICAL_ERROR, "Unexpected status for OffsetTransform::preparePair : {}", + IProcessor::statusToName(status)); } }; @@ -84,8 +85,7 @@ IProcessor::Status OffsetTransform::prepare(const PortNumbers & updated_input_po OffsetTransform::Status OffsetTransform::prepare() { if (ports_data.size() != 1) - throw Exception("prepare without arguments is not supported for multi-port OffsetTransform", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "prepare without arguments is not supported for multi-port OffsetTransform"); return prepare({0}, {0}); } diff --git a/src/Processors/Port.h b/src/Processors/Port.h index 9163402f600..67af2f041aa 100644 --- a/src/Processors/Port.h +++ b/src/Processors/Port.h @@ -89,7 +89,7 @@ protected: DataPtr() : data(new Data()) { if (unlikely((getUInt(data) & FLAGS_MASK) != 0)) - throw Exception("Not alignment memory for Port", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not alignment memory for Port"); } /// Pointer can store flags in case of exception in swap. ~DataPtr() { delete getPtr(getUInt(data) & PTR_MASK); } @@ -133,7 +133,7 @@ protected: State() : data(new Data()) { if (unlikely((getUInt(data) & FLAGS_MASK) != 0)) - throw Exception("Not alignment memory for Port", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not alignment memory for Port"); } ~State() @@ -153,14 +153,14 @@ protected: /// It's possible to push data into finished port. Will just ignore it. /// if (flags & IS_FINISHED) - /// throw Exception("Cannot push block to finished port.", ErrorCodes::LOGICAL_ERROR); + /// throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push block to finished port."); /// It's possible to push data into port which is not needed now. /// if ((flags & IS_NEEDED) == 0) - /// throw Exception("Cannot push block to port which is not needed.", ErrorCodes::LOGICAL_ERROR); + /// throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push block to port which is not needed."); if (unlikely(flags & HAS_DATA)) - throw Exception("Cannot push block to port which already has data", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot push block to port which already has data"); } void ALWAYS_INLINE pull(DataPtr & data_, std::uintptr_t & flags, bool set_not_needed = false) @@ -174,10 +174,10 @@ protected: /// It's ok to check because this flag can be changed only by pulling thread. if (unlikely((flags & IS_NEEDED) == 0) && !set_not_needed) - throw Exception("Cannot pull block from port which is not needed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot pull block from port which is not needed"); if (unlikely((flags & HAS_DATA) == 0)) - throw Exception("Cannot pull block from port which has no data", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot pull block from port which has no data"); } std::uintptr_t ALWAYS_INLINE setFlags(std::uintptr_t flags, std::uintptr_t mask) @@ -225,7 +225,7 @@ public: void ALWAYS_INLINE assumeConnected() const { if (unlikely(!isConnected())) - throw Exception("Port is not connected", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Port is not connected"); } bool ALWAYS_INLINE hasData() const @@ -237,14 +237,14 @@ public: IProcessor & getProcessor() { if (!processor) - throw Exception("Port does not belong to Processor", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Port does not belong to Processor"); return *processor; } const IProcessor & getProcessor() const { if (!processor) - throw Exception("Port does not belong to Processor", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Port does not belong to Processor"); return *processor; } diff --git a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp index e42642ceff8..86b64d1519c 100644 --- a/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp +++ b/src/Processors/QueryPlan/CreateSetAndFilterOnTheFlyStep.cpp @@ -198,7 +198,7 @@ void CreateSetAndFilterOnTheFlyStep::updateOutputStream() own_set->setHeader(getColumnSubset(input_streams[0].header, column_names)); - output_stream = input_streams[0]; + output_stream = createOutputStream(input_streams.front(), input_streams.front().header, getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/CreatingSetsStep.cpp b/src/Processors/QueryPlan/CreatingSetsStep.cpp index b52d86aa725..23e0a17a31b 100644 --- a/src/Processors/QueryPlan/CreatingSetsStep.cpp +++ b/src/Processors/QueryPlan/CreatingSetsStep.cpp @@ -77,7 +77,7 @@ void CreatingSetStep::describeActions(JSONBuilder::JSONMap & map) const CreatingSetsStep::CreatingSetsStep(DataStreams input_streams_) { if (input_streams_.empty()) - throw Exception("CreatingSetsStep cannot be created with no inputs", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "CreatingSetsStep cannot be created with no inputs"); input_streams = std::move(input_streams_); output_stream = input_streams.front(); @@ -91,7 +91,7 @@ CreatingSetsStep::CreatingSetsStep(DataStreams input_streams_) QueryPipelineBuilderPtr CreatingSetsStep::updatePipeline(QueryPipelineBuilders pipelines, const BuildQueryPipelineSettings &) { if (pipelines.empty()) - throw Exception("CreatingSetsStep cannot be created with no inputs", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "CreatingSetsStep cannot be created with no inputs"); auto main_pipeline = std::move(pipelines.front()); if (pipelines.size() == 1) diff --git a/src/Processors/QueryPlan/FillingStep.cpp b/src/Processors/QueryPlan/FillingStep.cpp index 8a370786820..dde3bdbf850 100644 --- a/src/Processors/QueryPlan/FillingStep.cpp +++ b/src/Processors/QueryPlan/FillingStep.cpp @@ -33,7 +33,7 @@ FillingStep::FillingStep(const DataStream & input_stream_, SortDescription sort_ , sort_description(std::move(sort_description_)), interpolate_description(interpolate_description_) { if (!input_stream_.has_single_port) - throw Exception("FillingStep expects single input", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); } void FillingStep::transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) @@ -60,7 +60,7 @@ void FillingStep::describeActions(JSONBuilder::JSONMap & map) const void FillingStep::updateOutputStream() { if (!input_streams.front().has_single_port) - throw Exception("FillingStep expects single input", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "FillingStep expects single input"); output_stream = createOutputStream( input_streams.front(), FillingTransform::transformHeader(input_streams.front().header, sort_description), getDataStreamTraits()); diff --git a/src/Processors/QueryPlan/IQueryPlanStep.cpp b/src/Processors/QueryPlan/IQueryPlanStep.cpp index a0035089c29..10352b330af 100644 --- a/src/Processors/QueryPlan/IQueryPlanStep.cpp +++ b/src/Processors/QueryPlan/IQueryPlanStep.cpp @@ -13,7 +13,7 @@ namespace ErrorCodes const DataStream & IQueryPlanStep::getOutputStream() const { if (!hasOutputStream()) - throw Exception("QueryPlanStep " + getName() + " does not have output stream.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPlanStep {} does not have output stream.", getName()); return *output_stream; } diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h index 7f435463d64..4b587ada2c0 100644 --- a/src/Processors/QueryPlan/Optimizations/Optimizations.h +++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h @@ -58,6 +58,9 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, /// Reading in order from MergeTree table if DISTINCT columns match or form a prefix of MergeTree sorting key size_t tryDistinctReadInOrder(QueryPlan::Node * node); +/// Remove redundant sorting +void tryRemoveRedundantSorting(QueryPlan::Node * root); + /// Put some steps under union, so that plan optimisation could be applied to union parts separately. /// For example, the plan can be rewritten like: /// - Something - - Expression - Something - diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp index 00abd803d2a..a2a69ae1f69 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.cpp @@ -14,6 +14,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const settings.distinct_in_order = from.optimize_distinct_in_order; settings.read_in_order = from.optimize_read_in_order && from.query_plan_read_in_order; settings.aggregation_in_order = from.optimize_aggregation_in_order && from.query_plan_aggregation_in_order; + settings.remove_redundant_sorting = from.query_plan_remove_redundant_sorting; return settings; } diff --git a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h index d4989b86b68..b894e5caf1d 100644 --- a/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h +++ b/src/Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h @@ -30,6 +30,9 @@ struct QueryPlanOptimizationSettings /// If aggregation-in-order optimisation is enabled bool aggregation_in_order = false; + /// If removing redundant sorting is enabled, for example, ORDER BY clauses in subqueries + bool remove_redundant_sorting = true; + static QueryPlanOptimizationSettings fromSettings(const Settings & from); static QueryPlanOptimizationSettings fromContext(ContextPtr from); }; diff --git a/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp new file mode 100644 index 00000000000..a1622322e95 --- /dev/null +++ b/src/Processors/QueryPlan/Optimizations/removeRedundantSorting.cpp @@ -0,0 +1,350 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB::QueryPlanOptimizations +{ +template +class QueryPlanVisitor +{ +protected: + struct FrameWithParent + { + QueryPlan::Node * node = nullptr; + QueryPlan::Node * parent_node = nullptr; + size_t next_child = 0; + }; + + using StackWithParent = std::vector; + + QueryPlan::Node * root = nullptr; + StackWithParent stack; + +public: + explicit QueryPlanVisitor(QueryPlan::Node * root_) : root(root_) { } + + void visit() + { + stack.push_back({.node = root}); + + while (!stack.empty()) + { + auto & frame = stack.back(); + + QueryPlan::Node * current_node = frame.node; + QueryPlan::Node * parent_node = frame.parent_node; + + logStep("back", current_node); + + /// top-down visit + if (0 == frame.next_child) + { + logStep("top-down", current_node); + if (!visitTopDown(current_node, parent_node)) + continue; + } + /// Traverse all children + if (frame.next_child < frame.node->children.size()) + { + auto next_frame = FrameWithParent{.node = current_node->children[frame.next_child], .parent_node = current_node}; + ++frame.next_child; + logStep("push", next_frame.node); + stack.push_back(next_frame); + continue; + } + + /// bottom-up visit + logStep("bottom-up", current_node); + visitBottomUp(current_node, parent_node); + + logStep("pop", current_node); + stack.pop_back(); + } + } + + bool visitTopDown(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + return getDerived().visitTopDownImpl(current_node, parent_node); + } + void visitBottomUp(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + getDerived().visitBottomUpImpl(current_node, parent_node); + } + +private: + Derived & getDerived() { return *static_cast(this); } + + const Derived & getDerived() const { return *static_cast(this); } + + std::unordered_map address2name; + std::unordered_map name_gen; + + std::string getStepId(const IQueryPlanStep* step) + { + const auto step_name = step->getName(); + auto it = address2name.find(step); + if (it != address2name.end()) + return it->second; + + const auto seq_num = name_gen[step_name]++; + return address2name.insert({step, fmt::format("{}{}", step_name, seq_num)}).first->second; + } + +protected: + void logStep(const char * prefix, const QueryPlan::Node * node) + { + if constexpr (debug_logging) + { + const IQueryPlanStep * current_step = node->step.get(); + LOG_DEBUG( + &Poco::Logger::get("QueryPlanVisitor"), + "{}: {}: {}", + prefix, + getStepId(current_step), + reinterpret_cast(current_step)); + } + } +}; + +constexpr bool debug_logging_enabled = false; + +class RemoveRedundantSorting : public QueryPlanVisitor +{ + /// stack with nodes which affect order + /// nodes added when traversing top-down + /// as soon as all children for the node on top of stack are traversed, the node is removed from stack + std::vector nodes_affect_order; + +public: + explicit RemoveRedundantSorting(QueryPlan::Node * root_) : QueryPlanVisitor(root_) { } + + bool visitTopDownImpl(QueryPlan::Node * current_node, QueryPlan::Node * parent_node) + { + IQueryPlanStep * current_step = current_node->step.get(); + + /// if there is parent node which can affect order and current step is sorting + /// then check if we can remove the sorting step (and corresponding expression step) + if (!nodes_affect_order.empty() && typeid_cast(current_step)) + { + if (tryRemoveSorting(current_node, parent_node)) + { + logStep("step affect sorting", nodes_affect_order.back()); + logStep("removed from plan", current_node); + + auto & frame = stack.back(); + /// mark removed node as visited + frame.next_child = frame.node->children.size(); + + /// current sorting step has been removed from plan, its parent has new children, need to visit them + auto next_frame = FrameWithParent{.node = parent_node->children[0], .parent_node = parent_node}; + stack.push_back(next_frame); + logStep("push", next_frame.node); + return false; + } + } + + if (typeid_cast(current_step) + || typeid_cast(current_step) /// (1) if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + || typeid_cast(current_step) /// (2) if ORDER BY is with FILL WITH, it is non-removable + || typeid_cast(current_step) /// (3) ORDER BY will change order of previous sorting + || typeid_cast(current_step)) /// (4) aggregation change order + { + logStep("nodes_affect_order/push", current_node); + nodes_affect_order.push_back(current_node); + } + + return true; + } + + void visitBottomUpImpl(QueryPlan::Node * current_node, QueryPlan::Node *) + { + /// we come here when all children of current_node are visited, + /// so, if it's a node which affect order, remove it from the corresponding stack + if (!nodes_affect_order.empty() && nodes_affect_order.back() == current_node) + { + logStep("nodes_affect_order/pop", current_node); + nodes_affect_order.pop_back(); + } + } + +private: + bool tryRemoveSorting(QueryPlan::Node * sorting_node, QueryPlan::Node * parent_node) + { + if (!canRemoveCurrentSorting()) + return false; + + /// remove sorting + parent_node->children.front() = sorting_node->children.front(); + + /// sorting removed, so need to update sorting traits for upstream steps + const DataStream * input_stream = &parent_node->children.front()->step->getOutputStream(); + chassert(parent_node == (stack.rbegin() + 1)->node); /// skip element on top of stack since it's sorting which was just removed + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const QueryPlan::Node * node = it->node; + /// skip removed sorting steps + auto * step = node->step.get(); + if (typeid_cast(step) && node != nodes_affect_order.back()) + continue; + + logStep("update sorting traits", node); + + auto * trans = dynamic_cast(step); + if (!trans) + { + logStep("stop update sorting traits: node is not transforming step", node); + break; + } + + trans->updateInputStream(*input_stream); + input_stream = &trans->getOutputStream(); + + /// update sorting properties though stack until reach node which affects order (inclusive) + if (node == nodes_affect_order.back()) + { + logStep("stop update sorting traits: reached node which affect order", node); + break; + } + } + + return true; + } + + bool canRemoveCurrentSorting() + { + chassert(!stack.empty()); + chassert(typeid_cast(stack.back().node->step.get())); + + return checkNodeAffectingOrder(nodes_affect_order.back()) && checkPathFromCurrentSortingNode(nodes_affect_order.back()); + } + + static bool checkNodeAffectingOrder(QueryPlan::Node * node_affect_order) + { + IQueryPlanStep * step_affect_order = node_affect_order->step.get(); + + /// if there are LIMITs on top of ORDER BY, the ORDER BY is non-removable + /// if ORDER BY is with FILL WITH, it is non-removable + if (typeid_cast(step_affect_order) || typeid_cast(step_affect_order) + || typeid_cast(step_affect_order)) + return false; + + /// (1) aggregation + if (const AggregatingStep * parent_aggr = typeid_cast(step_affect_order); parent_aggr) + { + if (parent_aggr->inOrder()) + return false; + + auto const & aggregates = parent_aggr->getParams().aggregates; + for (const auto & aggregate : aggregates) + { + auto aggregate_function_properties = AggregateFunctionFactory::instance().tryGetProperties(aggregate.function->getName()); + if (aggregate_function_properties && aggregate_function_properties->is_order_dependent) + return false; + } + return true; + } + /// (2) sorting + else if (const auto * next_sorting = typeid_cast(step_affect_order); next_sorting) + { + if (next_sorting->getType() == SortingStep::Type::Full) + return true; + } + + return false; + } + + bool checkPathFromCurrentSortingNode(const QueryPlan::Node * node_affect_order) + { + chassert(!stack.empty()); + chassert(typeid_cast(stack.back().node->step.get())); + + /// (1) if there is expression with stateful function between current step + /// and step which affects order, then we need to keep sorting since + /// stateful function output can depend on order + + /// skip element on top of stack since it's sorting + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const QueryPlan::Node * node = it->node; + /// walking though stack until reach node which affects order + if (node == node_affect_order) + break; + + const auto * step = node->step.get(); + /// skip removed sorting steps + if (typeid_cast(step)) + continue; + + logStep("checking for stateful function", node); + if (const auto * expr = typeid_cast(step); expr) + { + if (expr->getExpression()->hasStatefulFunctions()) + return false; + } + else if (const auto * filter = typeid_cast(step); filter) + { + if (filter->getExpression()->hasStatefulFunctions()) + return false; + } + else + { + const auto * trans = dynamic_cast(step); + if (!trans) + break; + + if (!trans->getDataStreamTraits().preserves_sorting) + break; + } + } + + /// check steps on stack if there are some which can prevent from removing SortingStep + for (StackWithParent::const_reverse_iterator it = stack.rbegin() + 1; it != stack.rend(); ++it) + { + const QueryPlan::Node * node = it->node; + /// walking though stack until reach node which affects order + if (node == node_affect_order) + break; + + const auto * step = node->step.get(); + /// skip removed sorting steps + if (typeid_cast(step)) + continue; + + logStep("checking path from current sorting", node); + + /// (2) for window function we do ORDER BY in 2 Sorting steps, + /// so do not delete Sorting if window function step is on top + if (typeid_cast(step)) + return false; + + if (const auto * join_step = typeid_cast(step); join_step) + { + if (typeid_cast(join_step->getJoin().get())) + return false; + } + } + + return true; + } +}; + +void tryRemoveRedundantSorting(QueryPlan::Node * root) +{ + RemoveRedundantSorting(root).visit(); +} + +} diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp index e1662d43015..e817a9ef8a9 100644 --- a/src/Processors/QueryPlan/QueryPlan.cpp +++ b/src/Processors/QueryPlan/QueryPlan.cpp @@ -34,13 +34,13 @@ QueryPlan & QueryPlan::operator=(QueryPlan &&) noexcept = default; void QueryPlan::checkInitialized() const { if (!isInitialized()) - throw Exception("QueryPlan was not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPlan was not initialized"); } void QueryPlan::checkNotCompleted() const { if (isCompleted()) - throw Exception("QueryPlan was already completed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPlan was already completed"); } bool QueryPlan::isCompleted() const @@ -58,8 +58,7 @@ const DataStream & QueryPlan::getCurrentDataStream() const void QueryPlan::unitePlans(QueryPlanStepPtr step, std::vector> plans) { if (isInitialized()) - throw Exception("Cannot unite plans because current QueryPlan is already initialized", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot unite plans because current QueryPlan is already initialized"); const auto & inputs = step->getInputStreams(); size_t num_inputs = step->getInputStreams().size(); @@ -447,6 +446,12 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings) { + /// optimization need to be applied before "mergeExpressions" optimization + /// it removes redundant sorting steps, but keep underlying expressions, + /// so "mergeExpressions" optimization handles them afterwards + if (optimization_settings.remove_redundant_sorting) + QueryPlanOptimizations::tryRemoveRedundantSorting(root); + QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes); QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes); } diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index f729e9e1383..22245b82966 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -581,8 +581,7 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder( while (need_marks > 0) { if (part.ranges.empty()) - throw Exception("Unexpected end of ranges while spreading marks among streams", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected end of ranges while spreading marks among streams"); MarkRange & range = part.ranges.front(); diff --git a/src/Processors/QueryPlan/SortingStep.cpp b/src/Processors/QueryPlan/SortingStep.cpp index 8766c0ba335..66cf94bfb55 100644 --- a/src/Processors/QueryPlan/SortingStep.cpp +++ b/src/Processors/QueryPlan/SortingStep.cpp @@ -70,7 +70,7 @@ SortingStep::SortingStep( , optimize_sorting_by_input_stream_properties(optimize_sorting_by_input_stream_properties_) { if (sort_settings.max_bytes_before_external_sort && sort_settings.tmp_data == nullptr) - throw Exception("Temporary data storage for external sorting is not provided", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary data storage for external sorting is not provided"); /// TODO: check input_stream is partially sorted by the same description. output_stream->sort_description = result_description; @@ -282,7 +282,16 @@ void SortingStep::transformPipeline(QueryPipelineBuilder & pipeline, const Build { /// skip sorting if stream is already sorted if (input_sort_mode == DataStream::SortScope::Global && input_sort_desc.hasPrefix(result_description)) + { + if (pipeline.getNumStreams() != 1) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "If input stream is globally sorted then there should be only 1 input stream at this stage. Number of input streams: " + "{}", + pipeline.getNumStreams()); + return; + } /// merge sorted if (input_sort_mode == DataStream::SortScope::Stream && input_sort_desc.hasPrefix(result_description)) diff --git a/src/Processors/ResizeProcessor.cpp b/src/Processors/ResizeProcessor.cpp index 6b37c10b3e7..8167fae9baf 100644 --- a/src/Processors/ResizeProcessor.cpp +++ b/src/Processors/ResizeProcessor.cpp @@ -343,12 +343,12 @@ IProcessor::Status StrictResizeProcessor::prepare(const PortNumbers & updated_in inputs_with_data.pop(); if (input_with_data.waiting_output == -1) - throw Exception("No associated output for input with data", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No associated output for input with data"); auto & waiting_output = output_ports[input_with_data.waiting_output]; if (waiting_output.status == OutputStatus::NotActive) - throw Exception("Invalid status NotActive for associated output", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid status NotActive for associated output"); if (waiting_output.status != OutputStatus::Finished) { diff --git a/src/Processors/Sources/MySQLSource.cpp b/src/Processors/Sources/MySQLSource.cpp index ecc80bef40b..434d413a238 100644 --- a/src/Processors/Sources/MySQLSource.cpp +++ b/src/Processors/Sources/MySQLSource.cpp @@ -188,7 +188,7 @@ namespace if (hhmmss.size() == 3) v = static_cast((std::stoi(hhmmss[0]) * 3600 + std::stoi(hhmmss[1]) * 60 + std::stold(hhmmss[2])) * 1000000); else - throw Exception("Unsupported value format", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported value format"); if (negative) v = -v; assert_cast(column).insertValue(v); @@ -260,7 +260,7 @@ namespace read_bytes_size += column.sizeOfValueIfFixed(); break; default: - throw Exception("Unsupported value type", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported value type"); } } diff --git a/src/Processors/Sources/SourceFromChunks.cpp b/src/Processors/Sources/SourceFromChunks.cpp new file mode 100644 index 00000000000..7b73b877d2e --- /dev/null +++ b/src/Processors/Sources/SourceFromChunks.cpp @@ -0,0 +1,47 @@ +#include + +namespace DB +{ + +SourceFromChunks::SourceFromChunks(Block header, Chunks chunks_) + : SourceFromChunks(header, std::make_shared(std::move(chunks_)), true) +{} + +SourceFromChunks::SourceFromChunks(Block header, std::shared_ptr chunks_) + : SourceFromChunks(header, chunks_, false) +{} + +SourceFromChunks::SourceFromChunks(Block header, std::shared_ptr chunks_, bool move_from_chunks_) + : ISource(std::move(header)) + , chunks(chunks_) + , it(chunks->begin()) + , move_from_chunks(move_from_chunks_) +{ +} + +String SourceFromChunks::getName() const +{ + return "SourceFromChunks"; +} + +Chunk SourceFromChunks::generate() +{ + if (it != chunks->end()) + if (move_from_chunks) + { + Chunk && chunk = std::move(*it); + it++; + return chunk; + } + else + { + Chunk chunk = it->clone(); + it++; + return chunk; + } + else + return {}; +} + +} + diff --git a/src/Processors/Sources/SourceFromChunks.h b/src/Processors/Sources/SourceFromChunks.h new file mode 100644 index 00000000000..d41999208a0 --- /dev/null +++ b/src/Processors/Sources/SourceFromChunks.h @@ -0,0 +1,30 @@ +#pragma once + +#include +#include + + +namespace DB +{ + +class SourceFromChunks : public ISource +{ +public: + SourceFromChunks(Block header, Chunks chunks_); + SourceFromChunks(Block header, std::shared_ptr chunks_); + + String getName() const override; + +protected: + Chunk generate() override; + +private: + SourceFromChunks(Block header, std::shared_ptr chunks_, bool move_from_chunks_); + + const std::shared_ptr chunks; + Chunks::iterator it; + /// Optimization: if the chunks are exclusively owned by SourceFromChunks, then generate() can move from them + const bool move_from_chunks; +}; + +} diff --git a/src/Processors/Sources/SourceFromSingleChunk.cpp b/src/Processors/Sources/SourceFromSingleChunk.cpp index 3e2b128acd2..00f40a34361 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.cpp +++ b/src/Processors/Sources/SourceFromSingleChunk.cpp @@ -6,6 +6,7 @@ namespace DB { SourceFromSingleChunk::SourceFromSingleChunk(Block header, Chunk chunk_) : ISource(std::move(header)), chunk(std::move(chunk_)) {} + SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmpty()), chunk(data.getColumns(), data.rows()) { const auto & sample = getPort().getHeader(); @@ -23,4 +24,14 @@ SourceFromSingleChunk::SourceFromSingleChunk(Block data) : ISource(data.cloneEmp } } +String SourceFromSingleChunk::getName() const +{ + return "SourceFromSingleChunk"; +} + +Chunk SourceFromSingleChunk::generate() +{ + return std::move(chunk); +} + } diff --git a/src/Processors/Sources/SourceFromSingleChunk.h b/src/Processors/Sources/SourceFromSingleChunk.h index fa85b94c231..fa6fa3856b5 100644 --- a/src/Processors/Sources/SourceFromSingleChunk.h +++ b/src/Processors/Sources/SourceFromSingleChunk.h @@ -1,4 +1,5 @@ #pragma once + #include @@ -7,13 +8,14 @@ namespace DB class SourceFromSingleChunk : public ISource { +/// If the source consists of multiple chunks you can instead use SourceFromChunks. public: - explicit SourceFromSingleChunk(Block header, Chunk chunk_); + SourceFromSingleChunk(Block header, Chunk chunk_); explicit SourceFromSingleChunk(Block data); - String getName() const override { return "SourceFromSingleChunk"; } + String getName() const override; protected: - Chunk generate() override { return std::move(chunk); } + Chunk generate() override; private: Chunk chunk; diff --git a/src/Processors/TTL/ITTLAlgorithm.cpp b/src/Processors/TTL/ITTLAlgorithm.cpp index c71ad740719..79140137df8 100644 --- a/src/Processors/TTL/ITTLAlgorithm.cpp +++ b/src/Processors/TTL/ITTLAlgorithm.cpp @@ -59,7 +59,7 @@ UInt32 ITTLAlgorithm::getTimestampByIndex(const IColumn * column, size_t index) return column_const->getValue(); } - throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column"); } } diff --git a/src/Processors/Transforms/AddingDefaultsTransform.cpp b/src/Processors/Transforms/AddingDefaultsTransform.cpp index 64bdf663d0f..e6c2bcec2c8 100644 --- a/src/Processors/Transforms/AddingDefaultsTransform.cpp +++ b/src/Processors/Transforms/AddingDefaultsTransform.cpp @@ -42,13 +42,13 @@ static void checkCalculated(const ColumnWithTypeAndName & col_read, size_t column_size = col_read.column->size(); if (column_size != col_defaults.column->size()) - throw Exception("Mismatch column sizes while adding defaults", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Mismatch column sizes while adding defaults"); if (column_size < defaults_needed) - throw Exception("Unexpected defaults count", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Unexpected defaults count"); if (!col_read.type->equals(*col_defaults.type)) - throw Exception("Mismatch column types while adding defaults", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Mismatch column types while adding defaults"); } static void mixNumberColumns( @@ -98,7 +98,7 @@ static void mixNumberColumns( }; if (!callOnIndexAndDataType(type_idx, call)) - throw Exception("Unexpected type on mixNumberColumns", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type on mixNumberColumns"); } static MutableColumnPtr mixColumns(const ColumnWithTypeAndName & col_read, diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 653f1b20eb3..836458ef792 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -40,11 +40,11 @@ namespace { const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo."); return agg_info; } @@ -356,7 +356,7 @@ private: APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) #undef M else - throw Exception("Unknown aggregated data variant.", ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT); + throw Exception(ErrorCodes::UNKNOWN_AGGREGATED_DATA_VARIANT, "Unknown aggregated data variant."); auto blocks = params->aggregator.prepareBlockAndFillSingleLevel(*first, params->final); for (auto & block : blocks) @@ -497,7 +497,7 @@ void AggregatingTransform::work() Processors AggregatingTransform::expandPipeline() { if (processors.empty()) - throw Exception("Can not expandPipeline in AggregatingTransform. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not expandPipeline in AggregatingTransform. This is a bug."); auto & out = processors.back()->getOutputs().front(); inputs.emplace_back(out.getHeader(), this); connect(out, inputs.back()); diff --git a/src/Processors/Transforms/ArrayJoinTransform.cpp b/src/Processors/Transforms/ArrayJoinTransform.cpp index eea1469c7a6..d9c940b8b05 100644 --- a/src/Processors/Transforms/ArrayJoinTransform.cpp +++ b/src/Processors/Transforms/ArrayJoinTransform.cpp @@ -19,7 +19,7 @@ ArrayJoinTransform::ArrayJoinTransform( { /// TODO // if (on_totals_) -// throw Exception("ARRAY JOIN is not supported for totals", ErrorCodes::LOGICAL_ERROR); +// throw Exception(ErrorCodes::LOGICAL_ERROR, "ARRAY JOIN is not supported for totals"); } void ArrayJoinTransform::transform(Chunk & chunk) diff --git a/src/Processors/Transforms/ColumnGathererTransform.cpp b/src/Processors/Transforms/ColumnGathererTransform.cpp index e7fdb6a3ce8..2628bf7d6db 100644 --- a/src/Processors/Transforms/ColumnGathererTransform.cpp +++ b/src/Processors/Transforms/ColumnGathererTransform.cpp @@ -22,7 +22,7 @@ ColumnGathererStream::ColumnGathererStream( , block_preferred_size(block_preferred_size_) { if (num_inputs == 0) - throw Exception("There are no streams to gather", ErrorCodes::EMPTY_DATA_PASSED); + throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "There are no streams to gather"); } void ColumnGathererStream::initialize(Inputs inputs) @@ -77,7 +77,7 @@ IMergingAlgorithm::Status ColumnGathererStream::merge() } if (next_required_source != -1 && sources[next_required_source].size == 0) - throw Exception("Cannot fetch required block. Source " + toString(next_required_source), ErrorCodes::RECEIVED_EMPTY_DATA); + throw Exception(ErrorCodes::RECEIVED_EMPTY_DATA, "Cannot fetch required block. Source {}", toString(next_required_source)); /// Surprisingly this call may directly change some internal state of ColumnGathererStream. /// output_column. See ColumnGathererStream::gather. @@ -116,8 +116,7 @@ void ColumnGathererStream::consume(Input & input, size_t source_num) if (0 == source.size) { - throw Exception("Fetched block is empty. Source " + toString(source_num), - ErrorCodes::RECEIVED_EMPTY_DATA); + throw Exception(ErrorCodes::RECEIVED_EMPTY_DATA, "Fetched block is empty. Source {}", source_num); } } @@ -132,9 +131,8 @@ ColumnGathererTransform::ColumnGathererTransform( , log(&Poco::Logger::get("ColumnGathererStream")) { if (header.columns() != 1) - throw Exception( - "Header should have 1 column, but contains " + toString(header.columns()), - ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Header should have 1 column, but contains {}", + toString(header.columns())); } void ColumnGathererTransform::work() diff --git a/src/Processors/Transforms/CopyTransform.cpp b/src/Processors/Transforms/CopyTransform.cpp index c9047c942d6..0af6c90dac9 100644 --- a/src/Processors/Transforms/CopyTransform.cpp +++ b/src/Processors/Transforms/CopyTransform.cpp @@ -12,7 +12,7 @@ CopyTransform::CopyTransform(const Block & header, size_t num_outputs) : IProcessor(InputPorts(1, header), OutputPorts(num_outputs, header)) { if (num_outputs <= 1) - throw Exception("CopyTransform expects more than 1 outputs, got " + std::to_string(num_outputs), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "CopyTransform expects more than 1 outputs, got {}", num_outputs); } IProcessor::Status CopyTransform::prepare() diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index fb3c8d6a87b..6a8d08cb661 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -56,7 +56,7 @@ void CreatingSetsTransform::startSubquery() done_with_table = !subquery.table; if (done_with_set /*&& done_with_join*/ && done_with_table) - throw Exception("Logical error: nothing to do with subquery", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: nothing to do with subquery"); if (table_out.initialized()) { diff --git a/src/Processors/Transforms/CubeTransform.cpp b/src/Processors/Transforms/CubeTransform.cpp index 669aaddd1df..afbb996f56e 100644 --- a/src/Processors/Transforms/CubeTransform.cpp +++ b/src/Processors/Transforms/CubeTransform.cpp @@ -15,7 +15,7 @@ CubeTransform::CubeTransform(Block header, AggregatingTransformParamsPtr params_ , aggregates_mask(getAggregatesMask(params->getHeader(), params->params.aggregates)) { if (keys.size() >= 8 * sizeof(mask)) - throw Exception("Too many keys are used for CubeTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Too many keys are used for CubeTransform."); } Chunk CubeTransform::generate() diff --git a/src/Processors/Transforms/DistinctSortedTransform.cpp b/src/Processors/Transforms/DistinctSortedTransform.cpp index 4c6ca03950c..26ee4d0ad08 100644 --- a/src/Processors/Transforms/DistinctSortedTransform.cpp +++ b/src/Processors/Transforms/DistinctSortedTransform.cpp @@ -96,7 +96,8 @@ DistinctSortedTransform::DistinctSortedTransform( ColumnNumbers const_column_positions; calcColumnPositionsInHeader(header, column_names, column_positions, const_column_positions); if (column_positions.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "DistinctSortedTransform: all columns can't be const. DistinctTransform should be used instead"); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DistinctSortedTransform: all columns can't be const. DistinctTransform should be used instead"); /// pre-calculate DISTINCT column positions which form sort prefix of sort description calcSortPrefixPositionsInHeader(header, sort_description, column_positions, const_column_positions, sort_prefix_positions); @@ -158,7 +159,8 @@ void DistinctSortedTransform::transform(Chunk & chunk) return; } - if (!set_size_limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) + size_t data_total_row_count = data.getTotalRowCount(); + if (!set_size_limits.check(data_total_row_count, data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) { stopReading(); chunk.clear(); @@ -166,7 +168,7 @@ void DistinctSortedTransform::transform(Chunk & chunk) } /// Stop reading if we already reached the limit. - if (limit_hint && data.getTotalRowCount() >= limit_hint) + if (limit_hint && data_total_row_count >= limit_hint) stopReading(); prev_chunk.chunk = std::move(chunk); diff --git a/src/Processors/Transforms/DistinctTransform.cpp b/src/Processors/Transforms/DistinctTransform.cpp index 4d78adb6e22..3619fa51bf6 100644 --- a/src/Processors/Transforms/DistinctTransform.cpp +++ b/src/Processors/Transforms/DistinctTransform.cpp @@ -94,19 +94,20 @@ void DistinctTransform::transform(Chunk & chunk) } /// Just go to the next chunk if there isn't any new record in the current one. - if (data.getTotalRowCount() == old_set_size) + size_t new_set_size = data.getTotalRowCount(); + if (new_set_size == old_set_size) return; - if (!set_size_limits.check(data.getTotalRowCount(), data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) + if (!set_size_limits.check(new_set_size, data.getTotalByteCount(), "DISTINCT", ErrorCodes::SET_SIZE_LIMIT_EXCEEDED)) return; for (auto & column : columns) column = column->filter(filter, -1); - chunk.setColumns(std::move(columns), data.getTotalRowCount() - old_set_size); + chunk.setColumns(std::move(columns), new_set_size - old_set_size); /// Stop reading if we already reach the limit - if (limit_hint && data.getTotalRowCount() >= limit_hint) + if (limit_hint && new_set_size >= limit_hint) { stopReading(); return; diff --git a/src/Processors/Transforms/FillingTransform.cpp b/src/Processors/Transforms/FillingTransform.cpp index 78ae6b8771f..2c5c550ffe2 100644 --- a/src/Processors/Transforms/FillingTransform.cpp +++ b/src/Processors/Transforms/FillingTransform.cpp @@ -187,22 +187,22 @@ FillingTransform::FillingTransform( const auto & type = header_.getByPosition(block_position).type; if (!tryConvertFields(descr, type)) - throw Exception("Incompatible types of WITH FILL expression values with column type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "Incompatible types of WITH FILL expression values with column type {}", type->getName()); if (type->isValueRepresentedByUnsignedInteger() && ((!descr.fill_from.isNull() && less(descr.fill_from, Field{0}, 1)) || (!descr.fill_to.isNull() && less(descr.fill_to, Field{0}, 1)))) { - throw Exception("WITH FILL bound values cannot be negative for unsigned type " - + type->getName(), ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, + "WITH FILL bound values cannot be negative for unsigned type {}", type->getName()); } } std::set unique_positions; for (auto pos : fill_column_positions) if (!unique_positions.insert(pos).second) - throw Exception("Multiple WITH FILL for identical expressions is not supported in ORDER BY", ErrorCodes::INVALID_WITH_FILL_EXPRESSION); + throw Exception(ErrorCodes::INVALID_WITH_FILL_EXPRESSION, "Multiple WITH FILL for identical expressions is not supported in ORDER BY"); size_t idx = 0; for (const ColumnWithTypeAndName & column : header_.getColumnsWithTypeAndName()) diff --git a/src/Processors/Transforms/FinishSortingTransform.cpp b/src/Processors/Transforms/FinishSortingTransform.cpp index 86b5c4c9a00..d8412eff588 100644 --- a/src/Processors/Transforms/FinishSortingTransform.cpp +++ b/src/Processors/Transforms/FinishSortingTransform.cpp @@ -31,8 +31,9 @@ FinishSortingTransform::FinishSortingTransform( { /// Check for sanity non-modified descriptions if (!isPrefix(description_sorted_, description_to_sort_)) - throw Exception("Can't finish sorting. SortDescription of already sorted stream is not prefix of " - "SortDescription needed to sort", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Can't finish sorting. SortDescription " + "of already sorted stream is not prefix of SortDescription needed to sort"); /// The target description is modified in SortingTransform constructor. /// To avoid doing the same actions with description_sorted just copy it from prefix of target description. diff --git a/src/Processors/Transforms/MemoryBoundMerging.h b/src/Processors/Transforms/MemoryBoundMerging.h index d4e2cd41e9d..3193a07a0bd 100644 --- a/src/Processors/Transforms/MemoryBoundMerging.h +++ b/src/Processors/Transforms/MemoryBoundMerging.h @@ -112,8 +112,7 @@ public: return Status::NeedData; if (!all_finished) - throw Exception( - "SortingAggregatedForMemoryBoundMergingTransform has read bucket, but couldn't push it.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "SortingAggregatedForMemoryBoundMergingTransform has read bucket, but couldn't push it."); if (overflow_chunk) { @@ -154,8 +153,7 @@ private: const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception( - "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedForMemoryBoundMergingTransform."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) diff --git a/src/Processors/Transforms/MergeJoinTransform.cpp b/src/Processors/Transforms/MergeJoinTransform.cpp index 315fc4810ba..cf5b4be4239 100644 --- a/src/Processors/Transforms/MergeJoinTransform.cpp +++ b/src/Processors/Transforms/MergeJoinTransform.cpp @@ -276,7 +276,7 @@ MergeJoinAlgorithm::MergeJoinAlgorithm( , log(&Poco::Logger::get("MergeJoinAlgorithm")) { if (input_headers.size() != 2) - throw Exception("MergeJoinAlgorithm requires exactly two inputs", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeJoinAlgorithm requires exactly two inputs"); auto strictness = table_join->getTableJoin().strictness(); if (strictness != JoinStrictness::Any && strictness != JoinStrictness::All) @@ -329,10 +329,10 @@ void MergeJoinAlgorithm::initialize(Inputs inputs) void MergeJoinAlgorithm::consume(Input & input, size_t source_num) { if (input.skip_last_row) - throw Exception("skip_last_row is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "skip_last_row is not supported"); if (input.permutation) - throw DB::Exception("permutation is not supported", ErrorCodes::NOT_IMPLEMENTED); + throw DB::Exception(ErrorCodes::NOT_IMPLEMENTED, "permutation is not supported"); if (input.chunk) { diff --git a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp index 4e90159aa11..9771c24f256 100644 --- a/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedMemoryEfficientTransform.cpp @@ -225,14 +225,14 @@ IProcessor::Status GroupingAggregatedTransform::prepare() /// Sanity check. If new bucket was read, we should be able to push it. /// This is always false, but we still keep this condition in case the code will be changed. if (!all_inputs_finished) // -V547 - throw Exception("GroupingAggregatedTransform has read new two-level bucket, but couldn't push it.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "GroupingAggregatedTransform has read new two-level bucket, but couldn't push it."); } else { if (!all_inputs_finished) // -V547 - throw Exception("GroupingAggregatedTransform should have read all chunks for single level aggregation, " - "but not all of the inputs are finished.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "GroupingAggregatedTransform should have read all chunks for single level aggregation, " + "but not all of the inputs are finished."); if (tryPushSingleLevelData()) return Status::PortFull; @@ -253,7 +253,7 @@ void GroupingAggregatedTransform::addChunk(Chunk chunk, size_t input) const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in GroupingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in GroupingAggregatedTransform."); if (const auto * agg_info = typeid_cast(info.get())) { @@ -319,8 +319,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) const auto * chunks_to_merge = typeid_cast(info.get()); if (!chunks_to_merge) - throw Exception("MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergingAggregatedSimpleTransform chunk must have ChunkInfo with type ChunksToMerge."); auto header = params->aggregator.getHeader(false); @@ -329,8 +328,7 @@ void MergingAggregatedBucketTransform::transform(Chunk & chunk) { const auto & cur_info = cur_chunk.getChunkInfo(); if (!cur_info) - throw Exception("Chunk info was not set for chunk in MergingAggregatedBucketTransform.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedBucketTransform."); if (const auto * agg_info = typeid_cast(cur_info.get())) { @@ -405,7 +403,7 @@ void SortingAggregatedTransform::addChunk(Chunk chunk, size_t from_input) { const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in SortingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in SortingAggregatedTransform."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) @@ -512,8 +510,7 @@ IProcessor::Status SortingAggregatedTransform::prepare() return Status::NeedData; if (!all_finished) - throw Exception("SortingAggregatedTransform has read bucket, but couldn't push it.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "SortingAggregatedTransform has read bucket, but couldn't push it."); if (overflow_chunk) { diff --git a/src/Processors/Transforms/MergingAggregatedTransform.cpp b/src/Processors/Transforms/MergingAggregatedTransform.cpp index 11d32278caf..9d0be86ff83 100644 --- a/src/Processors/Transforms/MergingAggregatedTransform.cpp +++ b/src/Processors/Transforms/MergingAggregatedTransform.cpp @@ -33,7 +33,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in MergingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in MergingAggregatedTransform."); if (const auto * agg_info = typeid_cast(info.get())) { @@ -58,7 +58,7 @@ void MergingAggregatedTransform::consume(Chunk chunk) bucket_to_blocks[block.info.bucket_num].emplace_back(std::move(block)); } else - throw Exception("Chunk should have AggregatedChunkInfo in MergingAggregatedTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in MergingAggregatedTransform."); } Chunk MergingAggregatedTransform::generate() diff --git a/src/Processors/Transforms/MongoDBSource.cpp b/src/Processors/Transforms/MongoDBSource.cpp index 88eddde0b3d..a8bfefdf8a6 100644 --- a/src/Processors/Transforms/MongoDBSource.cpp +++ b/src/Processors/Transforms/MongoDBSource.cpp @@ -98,9 +98,8 @@ namespace parse(static_cast &>(value).value())); break; default: - throw Exception( - "Type mismatch, expected a number, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected a number, got type id = {} for column {}", + toString(value.type()), name); } } @@ -156,15 +155,15 @@ namespace break; } - throw Exception{"Type mismatch, expected String, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String, got type id = {} for column {}", + toString(value.type()), name); } case ValueType::vtDate: { if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", + toString(value.type()), name); assert_cast(column).getData().push_back(static_cast(DateLUT::instance().toDayNum( static_cast &>(value).value().epochTime()))); @@ -174,8 +173,8 @@ namespace case ValueType::vtDateTime: { if (value.type() != Poco::MongoDB::ElementTraits::TypeId) - throw Exception{"Type mismatch, expected Timestamp, got type id = " + toString(value.type()) + " for column " + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected Timestamp, got type id = {} for column {}", + toString(value.type()), name); assert_cast(column).getData().push_back( static_cast(static_cast &>(value).value().epochTime())); @@ -189,13 +188,12 @@ namespace assert_cast(column).getData().push_back(parse(string)); } else - throw Exception{"Type mismatch, expected String (UUID), got type id = " + toString(value.type()) + " for column " - + name, - ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Type mismatch, expected String (UUID), got type id = {} for column {}", + toString(value.type()), name); break; } default: - throw Exception("Value of unsupported type:" + column.getName(), ErrorCodes::UNKNOWN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_TYPE, "Value of unsupported type:{}", column.getName()); } } diff --git a/src/Processors/Transforms/SortingTransform.cpp b/src/Processors/Transforms/SortingTransform.cpp index 603ee06b203..3d6bad6ed06 100644 --- a/src/Processors/Transforms/SortingTransform.cpp +++ b/src/Processors/Transforms/SortingTransform.cpp @@ -359,8 +359,8 @@ void SortingTransform::removeConstColumns(Chunk & chunk) size_t num_rows = chunk.getNumRows(); if (num_columns != const_columns_to_remove.size()) - throw Exception("Block has different number of columns with header: " + toString(num_columns) - + " vs " + toString(const_columns_to_remove.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Block has different number of columns with header: {} vs {}", + num_columns, const_columns_to_remove.size()); auto columns = chunk.detachColumns(); Columns column_without_constants; @@ -394,8 +394,7 @@ void SortingTransform::enrichChunkWithConstants(Chunk & chunk) else { if (next_non_const_column >= columns.size()) - throw Exception("Can't enrich chunk with constants because run out of non-constant columns.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't enrich chunk with constants because run out of non-constant columns."); column_with_constants.emplace_back(std::move(columns[next_non_const_column])); ++next_non_const_column; @@ -407,7 +406,7 @@ void SortingTransform::enrichChunkWithConstants(Chunk & chunk) void SortingTransform::serialize() { - throw Exception("Method 'serialize' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'serialize' is not implemented for {} processor", getName()); } } diff --git a/src/Processors/Transforms/StreamInQueryResultCacheTransform.cpp b/src/Processors/Transforms/StreamInQueryResultCacheTransform.cpp new file mode 100644 index 00000000000..841fcfdf8b5 --- /dev/null +++ b/src/Processors/Transforms/StreamInQueryResultCacheTransform.cpp @@ -0,0 +1,24 @@ +#include + +namespace DB +{ + +StreamInQueryResultCacheTransform::StreamInQueryResultCacheTransform( + const Block & header_, QueryResultCachePtr cache, const QueryResultCache::Key & cache_key, std::chrono::milliseconds min_query_duration) + : ISimpleTransform(header_, header_, false) + , cache_writer(cache->createWriter(cache_key, min_query_duration)) +{ +} + +void StreamInQueryResultCacheTransform::transform(Chunk & chunk) +{ + cache_writer.buffer(chunk.clone()); +} + +void StreamInQueryResultCacheTransform::finalizeWriteInQueryResultCache() +{ + if (!isCancelled()) + cache_writer.finalizeWrite(); +} + +}; diff --git a/src/Processors/Transforms/StreamInQueryResultCacheTransform.h b/src/Processors/Transforms/StreamInQueryResultCacheTransform.h new file mode 100644 index 00000000000..a90d33a0681 --- /dev/null +++ b/src/Processors/Transforms/StreamInQueryResultCacheTransform.h @@ -0,0 +1,26 @@ +#pragma once + +#include +#include + +namespace DB +{ + +class StreamInQueryResultCacheTransform : public ISimpleTransform +{ +public: + StreamInQueryResultCacheTransform( + const Block & header_, QueryResultCachePtr cache, const QueryResultCache::Key & cache_key, std::chrono::milliseconds min_query_duration); + +protected: + void transform(Chunk & chunk) override; + +public: + void finalizeWriteInQueryResultCache(); + String getName() const override { return "StreamInQueryResultCacheTransform"; } + +private: + QueryResultCache::Writer cache_writer; +}; + +} diff --git a/src/Processors/Transforms/TotalsHavingTransform.cpp b/src/Processors/Transforms/TotalsHavingTransform.cpp index 29be0ceed23..578d8cb8374 100644 --- a/src/Processors/Transforms/TotalsHavingTransform.cpp +++ b/src/Processors/Transforms/TotalsHavingTransform.cpp @@ -152,11 +152,11 @@ void TotalsHavingTransform::transform(Chunk & chunk) { const auto & info = chunk.getChunkInfo(); if (!info) - throw Exception("Chunk info was not set for chunk in TotalsHavingTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info was not set for chunk in TotalsHavingTransform."); const auto * agg_info = typeid_cast(info.get()); if (!agg_info) - throw Exception("Chunk should have AggregatedChunkInfo in TotalsHavingTransform.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should have AggregatedChunkInfo in TotalsHavingTransform."); if (agg_info->is_overflows) { @@ -189,7 +189,7 @@ void TotalsHavingTransform::transform(Chunk & chunk) for (const auto & action : expression->getActions()) { if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("Having clause cannot contain arrayJoin", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Having clause cannot contain arrayJoin"); } expression->execute(finalized_block, num_rows); @@ -260,7 +260,7 @@ void TotalsHavingTransform::addToTotals(const Chunk & chunk, const IColumn::Filt size_t size = vec.size(); if (filter && filter->size() != size) - throw Exception("Filter has size which differs from column size", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Filter has size which differs from column size"); if (filter) { diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 9bfaf1f375f..cb9ab95fba4 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -1522,7 +1522,8 @@ namespace recurrent_detail template void setValueToOutputColumn(const WindowTransform * /*transform*/, size_t /*function_index*/, T /*value*/) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "recurrent_detail::setValueToOutputColumn() is not implemented for {} type", typeid(T).name()); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "recurrent_detail::setValueToOutputColumn() is not implemented for {} type", typeid(T).name()); } template<> void setValueToOutputColumn(const WindowTransform * transform, size_t function_index, Float64 value) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 1be05135fe4..ade056629db 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -310,8 +310,8 @@ Chain buildPushingToViewsChain( if (lock == nullptr) { - // In case the materialized view is dropped at this point, we register a warning and ignore it - assert(materialized_view->is_dropped); + // In case the materialized view is dropped/detached at this point, we register a warning and ignore it + assert(materialized_view->is_dropped || materialized_view->is_detached); LOG_WARNING( &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); continue; diff --git a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp index 4ee3f2d4b82..ab7cfca3de2 100644 --- a/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp +++ b/src/Processors/Transforms/getSourceFromASTInsertQuery.cpp @@ -37,17 +37,17 @@ InputFormatPtr getInputFormatFromASTInsertQuery( const auto * ast_insert_query = ast->as(); if (!ast_insert_query) - throw Exception("Logical error: query requires data to insert, but it is not INSERT query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: query requires data to insert, but it is not INSERT query"); if (ast_insert_query->infile && context->getApplicationType() == Context::ApplicationType::SERVER) - throw Exception("Query has infile and was send directly to server", ErrorCodes::UNKNOWN_TYPE_OF_QUERY); + throw Exception(ErrorCodes::UNKNOWN_TYPE_OF_QUERY, "Query has infile and was send directly to server"); if (ast_insert_query->format.empty()) { if (input_function) - throw Exception("FORMAT must be specified for function input()", ErrorCodes::INVALID_USAGE_OF_INPUT); + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "FORMAT must be specified for function input()"); else - throw Exception("Logical error: INSERT query requires format to be set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: INSERT query requires format to be set"); } /// Data could be in parsed (ast_insert_query.data) and in not parsed yet (input_buffer_tail_part) part of query. diff --git a/src/QueryPipeline/BlockIO.cpp b/src/QueryPipeline/BlockIO.cpp index 9e42e06c722..9af7cd2b772 100644 --- a/src/QueryPipeline/BlockIO.cpp +++ b/src/QueryPipeline/BlockIO.cpp @@ -47,6 +47,22 @@ BlockIO::~BlockIO() reset(); } +void BlockIO::onFinish() +{ + if (finish_callback) + finish_callback(pipeline); + + pipeline.reset(); +} + +void BlockIO::onException() +{ + if (exception_callback) + exception_callback(); + + pipeline.reset(); +} + void BlockIO::setAllDataSent() const { /// The following queries does not have process_list_entry: diff --git a/src/QueryPipeline/BlockIO.h b/src/QueryPipeline/BlockIO.h index b69f86ac684..4c8d29d0ba8 100644 --- a/src/QueryPipeline/BlockIO.h +++ b/src/QueryPipeline/BlockIO.h @@ -31,21 +31,8 @@ struct BlockIO /// When it is true, don't bother sending any non-empty blocks to the out stream bool null_format = false; - void onFinish() - { - if (finish_callback) - finish_callback(pipeline); - - pipeline.reset(); - } - - void onException() - { - if (exception_callback) - exception_callback(); - - pipeline.reset(); - } + void onFinish(); + void onException(); /// Set is_all_data_sent in system.processes for this query. void setAllDataSent() const; diff --git a/src/QueryPipeline/Chain.cpp b/src/QueryPipeline/Chain.cpp index e5f2556a44f..6122517432a 100644 --- a/src/QueryPipeline/Chain.cpp +++ b/src/QueryPipeline/Chain.cpp @@ -19,7 +19,7 @@ static void checkSingleInput(const IProcessor & transform) transform.getInputs().size()); if (transform.getInputs().front().isConnected()) - throw Exception("Transform for chain has connected input", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Transform for chain has connected input"); } static void checkSingleOutput(const IProcessor & transform) @@ -32,7 +32,7 @@ static void checkSingleOutput(const IProcessor & transform) transform.getOutputs().size()); if (transform.getOutputs().front().isConnected()) - throw Exception("Transform for chain has connected output", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Transform for chain has connected output"); } static void checkTransform(const IProcessor & transform) diff --git a/src/QueryPipeline/ConnectionCollector.cpp b/src/QueryPipeline/ConnectionCollector.cpp index 9c52a2caf9c..7c484dcd6e8 100644 --- a/src/QueryPipeline/ConnectionCollector.cpp +++ b/src/QueryPipeline/ConnectionCollector.cpp @@ -34,7 +34,7 @@ ConnectionCollector & ConnectionCollector::init(ContextMutablePtr global_context { if (connection_collector) { - throw Exception("Connection collector is initialized twice. This is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Connection collector is initialized twice. This is a bug"); } connection_collector.reset(new ConnectionCollector(global_context_, max_threads)); diff --git a/src/QueryPipeline/ExecutionSpeedLimits.cpp b/src/QueryPipeline/ExecutionSpeedLimits.cpp index 9f0bd17e585..111ba7c9a95 100644 --- a/src/QueryPipeline/ExecutionSpeedLimits.cpp +++ b/src/QueryPipeline/ExecutionSpeedLimits.cpp @@ -112,7 +112,7 @@ static bool handleOverflowMode(OverflowMode mode, int code, FormatStringHelpergetInputs().size()); if (!output) - throw Exception("Cannot create Pipe from source because specified output port is nullptr", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe from source because specified output port is nullptr"); if (output == totals || output == extremes || (totals && totals == extremes)) - throw Exception("Cannot create Pipe from source because some of specified ports are the same", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe from source because some of specified ports are the same"); header = output->getHeader(); @@ -191,7 +189,9 @@ Pipe::Pipe(std::shared_ptr processors_) : processors(std::move(proce { if (!port.isConnected()) throw Exception( - ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe because processor {} has disconnected input port", processor->getName()); + ErrorCodes::LOGICAL_ERROR, + "Cannot create Pipe because processor {} has disconnected input port", + processor->getName()); const auto * connected_processor = &port.getOutputPort().getProcessor(); if (!set.contains(connected_processor)) @@ -221,8 +221,7 @@ Pipe::Pipe(std::shared_ptr processors_) : processors(std::move(proce } if (output_ports.empty()) - throw Exception("Cannot create Pipe because processors don't have any disconnected output ports", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create Pipe because processors don't have any disconnected output ports"); header = output_ports.front()->getHeader(); for (size_t i = 1; i < output_ports.size(); ++i) @@ -365,10 +364,10 @@ void Pipe::addSource(ProcessorPtr source) void Pipe::addTotalsSource(ProcessorPtr source) { if (output_ports.empty()) - throw Exception("Cannot add totals source to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add totals source to empty Pipe"); if (totals_port) - throw Exception("Totals source was already added to Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals source was already added to Pipe"); checkSource(*source); const auto & source_header = output_ports.front()->getHeader(); @@ -385,10 +384,10 @@ void Pipe::addTotalsSource(ProcessorPtr source) void Pipe::addExtremesSource(ProcessorPtr source) { if (output_ports.empty()) - throw Exception("Cannot add extremes source to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add extremes source to empty Pipe"); if (extremes_port) - throw Exception("Extremes source was already added to Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Extremes source was already added to Pipe"); checkSource(*source); const auto & source_header = output_ports.front()->getHeader(); @@ -435,7 +434,7 @@ void Pipe::addTransform(ProcessorPtr transform) void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort * extremes) { if (output_ports.empty()) - throw Exception("Cannot add transform to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform to empty Pipe"); auto & inputs = transform->getInputs(); if (inputs.size() != output_ports.size()) @@ -447,12 +446,10 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort output_ports.size()); if (totals && totals_port) - throw Exception("Cannot add transform with totals to Pipe because it already has totals", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with totals to Pipe because it already has totals"); if (extremes && extremes_port) - throw Exception("Cannot add transform with extremes to Pipe because it already has extremes", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform with extremes to Pipe because it already has extremes"); if (totals) totals_port = totals; @@ -485,16 +482,18 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort } if (totals && !found_totals) - throw Exception("Cannot add transform " + transform->getName() + " to Pipes because " - "specified totals port does not belong to it", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot add transform {} to Pipes because specified totals port does not belong to it", + transform->getName()); if (extremes && !found_extremes) - throw Exception("Cannot add transform " + transform->getName() + " to Pipes because " - "specified extremes port does not belong to it", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot add transform {} to Pipes because specified extremes port does not belong to it", + transform->getName()); if (output_ports.empty()) - throw Exception("Cannot add transform " + transform->getName() + " to Pipes because it has no outputs", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform {} to Pipes because it has no outputs", + transform->getName()); header = output_ports.front()->getHeader(); for (size_t i = 1; i < output_ports.size(); ++i) @@ -518,7 +517,7 @@ void Pipe::addTransform(ProcessorPtr transform, OutputPort * totals, OutputPort void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * extremes) { if (output_ports.empty()) - throw Exception("Cannot add transform to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform to empty Pipe"); auto & inputs = transform->getInputs(); size_t expected_inputs = output_ports.size() + (totals ? 1 : 0) + (extremes ? 1 : 0); @@ -531,12 +530,10 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * expected_inputs); if (totals && !totals_port) - throw Exception("Cannot add transform consuming totals to Pipe because Pipe does not have totals", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming totals to Pipe because Pipe does not have totals"); if (extremes && !extremes_port) - throw Exception("Cannot add transform consuming extremes to Pipe because it already has extremes", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add transform consuming extremes to Pipe because it already has extremes"); if (totals) { @@ -609,7 +606,7 @@ void Pipe::addTransform(ProcessorPtr transform, InputPort * totals, InputPort * void Pipe::addSimpleTransform(const ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) - throw Exception("Cannot add simple transform to empty Pipe.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot add simple transform to empty Pipe."); Block new_header; @@ -715,7 +712,7 @@ void Pipe::addChains(std::vector chains) void Pipe::resize(size_t num_streams, bool force, bool strict) { if (output_ports.empty()) - throw Exception("Cannot resize an empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot resize an empty Pipe"); if (!force && num_streams == numOutputPorts()) return; @@ -733,7 +730,7 @@ void Pipe::resize(size_t num_streams, bool force, bool strict) void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) { if (output_ports.empty()) - throw Exception("Cannot set sink to empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot set sink to empty Pipe"); auto add_transform = [&](OutputPort *& stream, Pipe::StreamType stream_type) { @@ -779,7 +776,7 @@ void Pipe::setSinks(const Pipe::ProcessorGetterWithStreamKind & getter) void Pipe::transform(const Transformer & transformer, bool check_ports) { if (output_ports.empty()) - throw Exception("Cannot transform empty Pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot transform empty Pipe"); auto new_processors = transformer(output_ports); @@ -837,16 +834,17 @@ void Pipe::transform(const Transformer & transformer, bool check_ports) const auto * connected_processor = &port.getInputPort().getProcessor(); if (check_ports && !set.contains(connected_processor)) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Transformation of Pipe is not valid because processor {} has output port which is connected with unknown processor {}", - processor->getName(), - connected_processor->getName()); + ErrorCodes::LOGICAL_ERROR, + "Transformation of Pipe is not valid because processor {} has output port which " + "is connected with unknown processor {}", + processor->getName(), + connected_processor->getName()); } } if (output_ports.empty()) - throw Exception( - "Transformation of Pipe is not valid because processors don't have any disconnected output ports", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Transformation of Pipe is not valid because processors don't have any disconnected output ports"); header = output_ports.front()->getHeader(); for (size_t i = 1; i < output_ports.size(); ++i) diff --git a/src/QueryPipeline/QueryPipeline.cpp b/src/QueryPipeline/QueryPipeline.cpp index e0da4c4f0eb..aa01801b1ec 100644 --- a/src/QueryPipeline/QueryPipeline.cpp +++ b/src/QueryPipeline/QueryPipeline.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -514,7 +515,6 @@ void QueryPipeline::setLimitsAndQuota(const StreamLocalLimits & limits, std::sha processors->emplace_back(std::move(transform)); } - bool QueryPipeline::tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const { if (!output_format) @@ -525,6 +525,27 @@ bool QueryPipeline::tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & resu return true; } +void QueryPipeline::streamIntoQueryResultCache(std::shared_ptr transform) +{ + assert(pulling()); + + connect(*output, transform->getInputPort()); + output = &transform->getOutputPort(); + processors->emplace_back(transform); +} + +void QueryPipeline::finalizeWriteInQueryResultCache() +{ + auto it = std::find_if( + processors->begin(), processors->end(), + [](ProcessorPtr processor){ return dynamic_cast(&*processor); }); + + /// the pipeline should theoretically contain just one StreamInQueryResultCacheTransform + + if (it != processors->end()) + dynamic_cast(**it).finalizeWriteInQueryResultCache(); +} + void QueryPipeline::addStorageHolder(StoragePtr storage) { resources.storage_holders.emplace_back(std::move(storage)); diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 153bcc55b39..da43aa035f3 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -4,7 +4,6 @@ #include #include - namespace DB { @@ -32,6 +31,7 @@ class SinkToStorage; class ISource; class ISink; class ReadProgressCallback; +class StreamInQueryResultCacheTransform; struct ColumnWithTypeAndName; using ColumnsWithTypeAndName = std::vector; @@ -105,6 +105,9 @@ public: void setLimitsAndQuota(const StreamLocalLimits & limits, std::shared_ptr quota_); bool tryGetResultRowsAndBytes(UInt64 & result_rows, UInt64 & result_bytes) const; + void streamIntoQueryResultCache(std::shared_ptr transform); + void finalizeWriteInQueryResultCache(); + void setQuota(std::shared_ptr quota_); void addStorageHolder(StoragePtr storage); diff --git a/src/QueryPipeline/QueryPipelineBuilder.cpp b/src/QueryPipeline/QueryPipelineBuilder.cpp index a3b3438306e..483447d1e4d 100644 --- a/src/QueryPipeline/QueryPipelineBuilder.cpp +++ b/src/QueryPipeline/QueryPipelineBuilder.cpp @@ -41,7 +41,7 @@ namespace ErrorCodes void QueryPipelineBuilder::checkInitialized() { if (!initialized()) - throw Exception("QueryPipeline is uninitialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPipeline is uninitialized"); } void QueryPipelineBuilder::checkInitializedAndNotCompleted() @@ -49,7 +49,7 @@ void QueryPipelineBuilder::checkInitializedAndNotCompleted() checkInitialized(); if (pipe.isCompleted()) - throw Exception("QueryPipeline is already completed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "QueryPipeline is already completed"); } static void checkSource(const ProcessorPtr & source, bool can_have_totals) @@ -83,10 +83,10 @@ static void checkSource(const ProcessorPtr & source, bool can_have_totals) void QueryPipelineBuilder::init(Pipe pipe_) { if (initialized()) - throw Exception("Pipeline has already been initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline has already been initialized"); if (pipe_.empty()) - throw Exception("Can't initialize pipeline with empty pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't initialize pipeline with empty pipe"); pipe = std::move(pipe_); } @@ -94,10 +94,10 @@ void QueryPipelineBuilder::init(Pipe pipe_) void QueryPipelineBuilder::init(QueryPipeline & pipeline) { if (initialized()) - throw Exception("Pipeline has already been initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline has already been initialized"); if (pipeline.pushing()) - throw Exception("Can't initialize pushing pipeline", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't initialize pushing pipeline"); if (pipeline.output) { @@ -208,10 +208,10 @@ void QueryPipelineBuilder::addTotalsHavingTransform(ProcessorPtr transform) checkInitializedAndNotCompleted(); if (!typeid_cast(transform.get())) - throw Exception("TotalsHavingTransform is expected for QueryPipeline::addTotalsHavingTransform", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TotalsHavingTransform is expected for QueryPipeline::addTotalsHavingTransform"); if (pipe.getTotalsPort()) - throw Exception("Totals having transform was already added to pipeline", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals having transform was already added to pipeline"); resize(1); @@ -224,7 +224,7 @@ void QueryPipelineBuilder::addDefaultTotals() checkInitializedAndNotCompleted(); if (pipe.getTotalsPort()) - throw Exception("Totals having transform was already added to pipeline", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Totals having transform was already added to pipeline"); const auto & current_header = getHeader(); Columns columns; @@ -351,7 +351,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesYShaped left->pipe.dropExtremes(); right->pipe.dropExtremes(); if (left->getNumStreams() != 1 || right->getNumStreams() != 1) - throw Exception("Join is supported only for pipelines with one output port", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Join is supported only for pipelines with one output port"); if (left->hasTotals() || right->hasTotals()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Current join algorithm is supported only for pipelines without totals"); @@ -464,7 +464,9 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe { delayed_root = std::make_shared(num_streams, join); if (!delayed_root->getInputs().empty() || delayed_root->getOutputs().size() != num_streams) - throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksTransform should have no inputs and {} outputs, but has {} inputs and {} outputs", + throw Exception(ErrorCodes::LOGICAL_ERROR, + "DelayedJoinedBlocksTransform should have no inputs and {} outputs, " + "but has {} inputs and {} outputs", num_streams, delayed_root->getInputs().size(), delayed_root->getOutputs().size()); if (collected_processors) @@ -491,7 +493,7 @@ std::unique_ptr QueryPipelineBuilder::joinPipelinesRightLe // Process delayed joined blocks when all JoiningTransform are finished. auto delayed = std::make_shared(joined_header); if (delayed->getInputs().size() != 1 || delayed->getOutputs().size() != 1) - throw Exception("DelayedJoinedBlocksWorkerTransform should have one input and one output", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "DelayedJoinedBlocksWorkerTransform should have one input and one output"); connect(*delayed_root_output_ports[i], delayed->getInputs().front()); @@ -621,7 +623,7 @@ void QueryPipelineBuilder::setProgressCallback(ProgressCallback callback) PipelineExecutorPtr QueryPipelineBuilder::execute() { if (!isCompleted()) - throw Exception("Cannot execute pipeline because it is not completed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute pipeline because it is not completed"); return std::make_shared(pipe.processors, process_list_element); } diff --git a/src/QueryPipeline/RemoteQueryExecutor.cpp b/src/QueryPipeline/RemoteQueryExecutor.cpp index 49bc6a6326d..961d8129d29 100644 --- a/src/QueryPipeline/RemoteQueryExecutor.cpp +++ b/src/QueryPipeline/RemoteQueryExecutor.cpp @@ -357,7 +357,7 @@ std::variant RemoteQueryExecutor::restartQueryWithoutDuplicatedUUIDs else return read(*read_context); } - throw Exception("Found duplicate uuids while processing query", ErrorCodes::DUPLICATED_PART_UUIDS); + throw Exception(ErrorCodes::DUPLICATED_PART_UUIDS, "Found duplicate uuids while processing query"); } std::optional RemoteQueryExecutor::processPacket(Packet packet) @@ -466,7 +466,7 @@ bool RemoteQueryExecutor::setPartUUIDs(const std::vector & uuids) void RemoteQueryExecutor::processReadTaskRequest() { if (!task_iterator) - throw Exception("Distributed task iterator is not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Distributed task iterator is not initialized"); auto response = (*task_iterator)(); connections->sendReadTaskResponse(response); } @@ -474,7 +474,7 @@ void RemoteQueryExecutor::processReadTaskRequest() void RemoteQueryExecutor::processMergeTreeReadTaskRequest(PartitionReadRequest request) { if (!parallel_reading_coordinator) - throw Exception("Coordinator for parallel reading from replicas is not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Coordinator for parallel reading from replicas is not initialized"); auto response = parallel_reading_coordinator->handleRequest(std::move(request)); connections->sendMergeTreeReadTaskResponse(response); diff --git a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp index 4596bbb8961..f1a23bf7c79 100644 --- a/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp +++ b/src/QueryPipeline/RemoteQueryExecutorReadContext.cpp @@ -148,7 +148,7 @@ bool RemoteQueryExecutorReadContext::checkTimeoutImpl(bool blocking) { /// Socket receive timeout. Drain it in case of error, or it may be hide by timeout exception. timer.drain(); - throw NetException("Timeout exceeded", ErrorCodes::SOCKET_TIMEOUT); + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded"); } return true; diff --git a/src/Server/GRPCServer.cpp b/src/Server/GRPCServer.cpp index 632e109d7bd..106f488f83a 100644 --- a/src/Server/GRPCServer.cpp +++ b/src/Server/GRPCServer.cpp @@ -243,10 +243,9 @@ namespace { auto max_session_timeout = config.getUInt("max_session_timeout", 3600); if (session_timeout > max_session_timeout) - throw Exception( - "Session timeout '" + std::to_string(session_timeout) + "' is larger than max_session_timeout: " - + std::to_string(max_session_timeout) + ". Maximum session timeout could be modified in configuration file.", - ErrorCodes::INVALID_SESSION_TIMEOUT); + throw Exception(ErrorCodes::INVALID_SESSION_TIMEOUT, "Session timeout '{}' is larger than max_session_timeout: {}. " + "Maximum session timeout could be modified in configuration file.", + std::to_string(session_timeout), std::to_string(max_session_timeout)); } else session_timeout = config.getInt("default_session_timeout", 60); @@ -429,7 +428,7 @@ namespace void write(const GRPCResult &, const CompletionCallback &) override { - throw Exception("Responder::write() should not be called", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Responder::write() should not be called"); } void writeAndFinish(const GRPCResult & result, const grpc::Status & status, const CompletionCallback & callback) override @@ -461,7 +460,7 @@ namespace void write(const GRPCResult &, const CompletionCallback &) override { - throw Exception("Responder::write() should not be called", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Responder::write() should not be called"); } void writeAndFinish(const GRPCResult & result, const grpc::Status & status, const CompletionCallback & callback) override @@ -778,7 +777,7 @@ namespace readQueryInfo(); if (query_info.cancel()) - throw Exception("Initial query info cannot set the 'cancel' field", ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "Initial query info cannot set the 'cancel' field"); LOG_DEBUG(log, "Received initial QueryInfo: {}", getQueryDescription(query_info)); } @@ -918,7 +917,7 @@ namespace query_context->setExternalTablesInitializer([this] (ContextPtr context) { if (context != query_context) - throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in external tables initializer"); createExternalTables(); }); @@ -926,7 +925,7 @@ namespace query_context->setInputInitializer([this] (ContextPtr context, const StoragePtr & input_storage) { if (context != query_context) - throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in Input initializer"); input_function_is_used = true; initializePipeline(input_storage->getInMemoryMetadataPtr()->getSampleBlock()); }); @@ -934,7 +933,7 @@ namespace query_context->setInputBlocksReaderCallback([this](ContextPtr context) -> Block { if (context != query_context) - throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in InputBlocksReader"); Block block; while (!block && pipeline_executor->pull(block)); @@ -962,12 +961,12 @@ namespace if (!has_data_to_insert) { if (!insert_query) - throw Exception("Query requires data to insert, but it is not an INSERT query", ErrorCodes::NO_DATA_TO_INSERT); + throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "Query requires data to insert, but it is not an INSERT query"); else { const auto & settings = query_context->getSettingsRef(); if (settings.throw_if_no_data_to_insert) - throw Exception("No data to insert", ErrorCodes::NO_DATA_TO_INSERT); + throw Exception(ErrorCodes::NO_DATA_TO_INSERT, "No data to insert"); else return; } @@ -1026,7 +1025,7 @@ namespace break; if (!isInputStreaming(call_type)) - throw Exception("next_query_info is allowed to be set only for streaming input", ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "next_query_info is allowed to be set only for streaming input"); readQueryInfo(); if (!query_info.query().empty() || !query_info.query_id().empty() || !query_info.settings().empty() @@ -1034,9 +1033,9 @@ namespace || query_info.external_tables_size() || !query_info.user_name().empty() || !query_info.password().empty() || !query_info.quota().empty() || !query_info.session_id().empty()) { - throw Exception("Extra query infos can be used only to add more input data. " - "Only the following fields can be set: input_data, next_query_info, cancel", - ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, + "Extra query infos can be used only to add more input data. " + "Only the following fields can be set: input_data, next_query_info, cancel"); } if (isQueryCancelled()) @@ -1148,7 +1147,7 @@ namespace break; if (!isInputStreaming(call_type)) - throw Exception("next_query_info is allowed to be set only for streaming input", ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, "next_query_info is allowed to be set only for streaming input"); readQueryInfo(); if (!query_info.query().empty() || !query_info.query_id().empty() || !query_info.settings().empty() @@ -1156,9 +1155,11 @@ namespace || !query_info.output_format().empty() || !query_info.user_name().empty() || !query_info.password().empty() || !query_info.quota().empty() || !query_info.session_id().empty()) { - throw Exception("Extra query infos can be used only to add more data to input or more external tables. " - "Only the following fields can be set: input_data, external_tables, next_query_info, cancel", - ErrorCodes::INVALID_GRPC_QUERY_INFO); + throw Exception(ErrorCodes::INVALID_GRPC_QUERY_INFO, + "Extra query infos can be used only " + "to add more data to input or more external tables. " + "Only the following fields can be set: " + "input_data, external_tables, next_query_info, cancel"); } if (isQueryCancelled()) break; @@ -1439,9 +1440,9 @@ namespace if (failed_to_read_query_info) { if (initial_query_info_read) - throw Exception("Failed to read extra QueryInfo", ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Failed to read extra QueryInfo"); else - throw Exception("Failed to read initial QueryInfo", ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Failed to read initial QueryInfo"); } } @@ -1684,7 +1685,7 @@ namespace void Call::throwIfFailedToSendResult() { if (failed_to_send_result) - throw Exception("Failed to send result to the client", ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Failed to send result to the client"); } void Call::sendException(const Exception & exception) @@ -1873,7 +1874,7 @@ void GRPCServer::start() grpc_server = builder.BuildAndStart(); if (nullptr == grpc_server) { - throw DB::Exception("Can't start grpc server, there is a port conflict", DB::ErrorCodes::NETWORK_ERROR); + throw DB::Exception(DB::ErrorCodes::NETWORK_ERROR, "Can't start grpc server, there is a port conflict"); } runner->start(); diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index d0b2b3fd493..29bfa8065ba 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -260,12 +260,12 @@ static std::chrono::steady_clock::duration parseSessionTimeout( ReadBufferFromString buf(session_timeout_str); if (!tryReadIntText(session_timeout, buf) || !buf.eof()) - throw Exception("Invalid session timeout: '" + session_timeout_str + "'", ErrorCodes::INVALID_SESSION_TIMEOUT); + throw Exception(ErrorCodes::INVALID_SESSION_TIMEOUT, "Invalid session timeout: '{}'", session_timeout_str); if (session_timeout > max_session_timeout) - throw Exception("Session timeout '" + session_timeout_str + "' is larger than max_session_timeout: " + toString(max_session_timeout) - + ". Maximum session timeout could be modified in configuration file.", - ErrorCodes::INVALID_SESSION_TIMEOUT); + throw Exception(ErrorCodes::INVALID_SESSION_TIMEOUT, "Session timeout '{}' is larger than max_session_timeout: {}. " + "Maximum session timeout could be modified in configuration file.", + session_timeout_str, max_session_timeout); } return std::chrono::seconds(session_timeout); @@ -279,12 +279,12 @@ void HTTPHandler::pushDelayedResults(Output & used_output) auto * cascade_buffer = typeid_cast(used_output.out_maybe_delayed_and_compressed.get()); if (!cascade_buffer) - throw Exception("Expected CascadeWriteBuffer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected CascadeWriteBuffer"); cascade_buffer->getResultBuffers(write_buffers); if (write_buffers.empty()) - throw Exception("At least one buffer is expected to overwrite result into HTTP response", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "At least one buffer is expected to overwrite result into HTTP response"); for (auto & write_buf : write_buffers) { @@ -352,25 +352,31 @@ bool HTTPHandler::authenticateUser( { /// It is prohibited to mix different authorization schemes. if (has_http_credentials) - throw Exception("Invalid authentication: it is not allowed to use SSL certificate authentication and Authorization HTTP header simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use SSL certificate authentication and Authorization HTTP header simultaneously"); if (has_credentials_in_query_params) - throw Exception("Invalid authentication: it is not allowed to use SSL certificate authentication and authentication via parameters simultaneously simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use SSL certificate authentication and authentication via parameters simultaneously simultaneously"); if (has_ssl_certificate_auth) { #if USE_SSL if (!password.empty()) - throw Exception("Invalid authentication: it is not allowed to use SSL certificate authentication and authentication via password simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use SSL certificate authentication and authentication via password simultaneously"); if (request.havePeerCertificate()) certificate_common_name = request.peerCertificate().commonName(); if (certificate_common_name.empty()) - throw Exception("Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: SSL certificate authentication requires nonempty certificate's Common Name"); #else - throw Exception( - "SSL certificate authentication disabled because ClickHouse was built without SSL library", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "SSL certificate authentication disabled because ClickHouse was built without SSL library"); #endif } } @@ -378,7 +384,9 @@ bool HTTPHandler::authenticateUser( { /// It is prohibited to mix different authorization schemes. if (has_credentials_in_query_params) - throw Exception("Invalid authentication: it is not allowed to use Authorization HTTP header and authentication via parameters simultaneously", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Invalid authentication: it is not allowed " + "to use Authorization HTTP header and authentication via parameters simultaneously"); std::string scheme; std::string auth_info; @@ -395,11 +403,11 @@ bool HTTPHandler::authenticateUser( spnego_challenge = auth_info; if (spnego_challenge.empty()) - throw Exception("Invalid authentication: SPNEGO challenge is empty", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); } else { - throw Exception("Invalid authentication: '" + scheme + "' HTTP Authorization scheme is not supported", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); } quota_key = params.get("quota_key", ""); @@ -419,7 +427,7 @@ bool HTTPHandler::authenticateUser( auto * certificate_credentials = dynamic_cast(request_credentials.get()); if (!certificate_credentials) - throw Exception("Invalid authentication: expected SSL certificate authorization scheme", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected SSL certificate authorization scheme"); } else if (!spnego_challenge.empty()) { @@ -428,7 +436,7 @@ bool HTTPHandler::authenticateUser( auto * gss_acceptor_context = dynamic_cast(request_credentials.get()); if (!gss_acceptor_context) - throw Exception("Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: unexpected 'Negotiate' HTTP Authorization scheme expected"); #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunreachable-code" @@ -441,7 +449,7 @@ bool HTTPHandler::authenticateUser( if (!gss_acceptor_context->isFailed() && !gss_acceptor_context->isReady()) { if (spnego_response.empty()) - throw Exception("Invalid authentication: 'Negotiate' HTTP Authorization failure", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: 'Negotiate' HTTP Authorization failure"); response.setStatusAndReason(HTTPResponse::HTTP_UNAUTHORIZED); response.send(); @@ -455,7 +463,7 @@ bool HTTPHandler::authenticateUser( auto * basic_credentials = dynamic_cast(request_credentials.get()); if (!basic_credentials) - throw Exception("Invalid authentication: expected 'Basic' HTTP Authorization scheme", ErrorCodes::AUTHENTICATION_FAILED); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: expected 'Basic' HTTP Authorization scheme"); basic_credentials->setUserName(user); basic_credentials->setPassword(password); @@ -614,7 +622,7 @@ void HTTPHandler::processQuery( { auto * prev_memory_buffer = typeid_cast(prev_buf.get()); if (!prev_memory_buffer) - throw Exception("Expected MemoryWriteBuffer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected MemoryWriteBuffer"); auto rdbuf = prev_memory_buffer->tryGetReadBuffer(); copyData(*rdbuf , *next_buffer); @@ -980,9 +988,9 @@ void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse /// Workaround. Poco does not detect 411 Length Required case. if (request.getMethod() == HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && !request.hasContentLength()) { - throw Exception( - "The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", - ErrorCodes::HTTP_LENGTH_REQUIRED); + throw Exception(ErrorCodes::HTTP_LENGTH_REQUIRED, + "The Transfer-Encoding is not chunked and there " + "is no Content-Length header for POST request"); } processQuery(request, params, response, used_output, query_scope); @@ -1189,10 +1197,8 @@ static inline CompiledRegexPtr getCompiledRegex(const std::string & expression) auto compiled_regex = std::make_shared(expression); if (!compiled_regex->ok()) - throw Exception( - "Cannot compile re2: " + expression + " for http handling rule, error: " + compiled_regex->error() - + ". Look at https://github.com/google/re2/wiki/Syntax for reference.", - ErrorCodes::CANNOT_COMPILE_REGEXP); + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "Cannot compile re2: {} for http handling rule, error: {}. " + "Look at https://github.com/google/re2/wiki/Syntax for reference.", expression, compiled_regex->error()); return compiled_regex; } @@ -1202,7 +1208,7 @@ HTTPRequestHandlerFactoryPtr createPredefinedHandlerFactory(IServer & server, const std::string & config_prefix) { if (!config.has(config_prefix + ".handler.query")) - throw Exception("There is no path '" + config_prefix + ".handler.query' in configuration file.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "There is no path '{}.handler.query' in configuration file.", config_prefix); std::string predefined_query = config.getString(config_prefix + ".handler.query"); NameSet analyze_receive_params = analyzeReceiveQueryParams(predefined_query); diff --git a/src/Server/HTTPHandlerFactory.cpp b/src/Server/HTTPHandlerFactory.cpp index e4da7941b50..78e374ee9e0 100644 --- a/src/Server/HTTPHandlerFactory.cpp +++ b/src/Server/HTTPHandlerFactory.cpp @@ -55,8 +55,8 @@ static inline auto createHandlersFactoryFromConfig( const auto & handler_type = config.getString(prefix + "." + key + ".handler.type", ""); if (handler_type.empty()) - throw Exception("Handler type in config is not specified here: " + prefix + "." + key + ".handler.type", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Handler type in config is not specified here: " + "{}.{}.handler.type", prefix, key); if (handler_type == "static") main_handler_factory->addHandler(createStaticHandlerFactory(server, config, prefix + "." + key)); @@ -69,12 +69,12 @@ static inline auto createHandlersFactoryFromConfig( else if (handler_type == "replicas_status") main_handler_factory->addHandler(createReplicasStatusHandlerFactory(server, config, prefix + "." + key)); else - throw Exception("Unknown handler type '" + handler_type + "' in config here: " + prefix + "." + key + ".handler.type", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Unknown handler type '{}' in config here: {}.{}.handler.type", + handler_type, prefix, key); } else - throw Exception("Unknown element in config: " + prefix + "." + key + ", must be 'rule' or 'defaults'", - ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: " + "{}.{}, must be 'rule' or 'defaults'", prefix, key); } return main_handler_factory; @@ -116,7 +116,7 @@ HTTPRequestHandlerFactoryPtr createHandlerFactory(IServer & server, const Poco:: else if (name == "PrometheusHandler-factory") return createPrometheusMainHandlerFactory(server, config, async_metrics, name); - throw Exception("LOGICAL ERROR: Unknown HTTP handler factory name.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: Unknown HTTP handler factory name."); } static const auto ping_response_expression = "Ok.\n"; diff --git a/src/Server/HTTPHandlerFactory.h b/src/Server/HTTPHandlerFactory.h index f56c712c615..ebdfa954bf7 100644 --- a/src/Server/HTTPHandlerFactory.h +++ b/src/Server/HTTPHandlerFactory.h @@ -63,7 +63,7 @@ public: else if (filter_type == "methods") addFilter(methodsFilter(config, prefix + ".methods")); else - throw Exception("Unknown element in config: " + prefix + "." + filter_type, ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}", prefix, filter_type); } } diff --git a/src/Server/HTTPHandlerRequestFilter.h b/src/Server/HTTPHandlerRequestFilter.h index 3e6a562e3fa..c6bcdb211e1 100644 --- a/src/Server/HTTPHandlerRequestFilter.h +++ b/src/Server/HTTPHandlerRequestFilter.h @@ -58,8 +58,9 @@ static inline auto getExpression(const std::string & expression) auto compiled_regex = std::make_shared(expression.substr(6)); if (!compiled_regex->ok()) - throw Exception("cannot compile re2: " + expression + " for http handling rule, error: " + compiled_regex->error() + - ". Look at https://github.com/google/re2/wiki/Syntax for reference.", ErrorCodes::CANNOT_COMPILE_REGEXP); + throw Exception(ErrorCodes::CANNOT_COMPILE_REGEXP, "cannot compile re2: {} for http handling rule, error: {}. " + "Look at https://github.com/google/re2/wiki/Syntax for reference.", + expression, compiled_regex->error()); return std::make_pair(expression, compiled_regex); } diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 3fc66664795..c8b3ec31577 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -63,7 +63,7 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer /// Locked for read while query processing std::shared_lock lock(endpoint->rwlock); if (endpoint->blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Transferring part to replica was cancelled"); if (compress) { diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 38a10926036..db3dfefb238 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -269,12 +269,12 @@ Poco::Timespan KeeperTCPHandler::receiveHandshake(int32_t handshake_length) std::array passwd {}; if (!isHandShake(handshake_length)) - throw Exception("Unexpected handshake length received: " + toString(handshake_length), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected handshake length received: {}", toString(handshake_length)); Coordination::read(protocol_version, *in); if (protocol_version != Coordination::ZOOKEEPER_PROTOCOL_VERSION) - throw Exception("Unexpected protocol version: " + toString(protocol_version), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected protocol version: {}", toString(protocol_version)); Coordination::read(last_zxid_seen, *in); Coordination::read(timeout_ms, *in); @@ -471,7 +471,7 @@ void KeeperTCPHandler::runImpl() } if (result.error) - throw Exception("Exception happened while reading from socket", ErrorCodes::SYSTEM_ERROR); + throw Exception(ErrorCodes::SYSTEM_ERROR, "Exception happened while reading from socket"); if (session_stopwatch.elapsedMicroseconds() > static_cast(session_timeout.totalMicroseconds())) { diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 8e2d99e2909..3715d658730 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -123,7 +123,7 @@ void MySQLHandler::run() handshake_response.auth_plugin_name); if (!(client_capabilities & CLIENT_PROTOCOL_41)) - throw Exception("Required capability: CLIENT_PROTOCOL_41.", ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES); + throw Exception(ErrorCodes::MYSQL_CLIENT_INSUFFICIENT_CAPABILITIES, "Required capability: CLIENT_PROTOCOL_41."); authenticate(handshake_response.username, handshake_response.auth_plugin_name, handshake_response.auth_response); @@ -181,7 +181,7 @@ void MySQLHandler::run() comPing(); break; default: - throw Exception(Poco::format("Command %d is not implemented.", command), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Command {} is not implemented.", command); } } catch (const NetException & exc) @@ -221,7 +221,7 @@ void MySQLHandler::finishHandshake(MySQLProtocol::ConnectionPhase::HandshakeResp int ret = socket().receiveBytes(buf + pos, static_cast(packet_size - pos)); if (ret == 0) { - throw Exception("Cannot read all data. Bytes read: " + std::to_string(pos) + ". Bytes expected: 3", ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data. Bytes read: {}. Bytes expected: 3", std::to_string(pos)); } pos += ret; } @@ -368,14 +368,15 @@ void MySQLHandler::comQuery(ReadBuffer & payload) void MySQLHandler::authPluginSSL() { - throw Exception("ClickHouse was built without SSL support. Try specifying password using double SHA1 in users.xml.", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "ClickHouse was built without SSL support. Try specifying password using double SHA1 in users.xml."); } void MySQLHandler::finishHandshakeSSL( [[maybe_unused]] size_t packet_size, [[maybe_unused]] char * buf, [[maybe_unused]] size_t pos, [[maybe_unused]] std::function read_bytes, [[maybe_unused]] MySQLProtocol::ConnectionPhase::HandshakeResponse & packet) { - throw Exception("Client requested SSL, while it is disabled.", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Client requested SSL, while it is disabled."); } #if USE_SSL diff --git a/src/Server/MySQLHandlerFactory.cpp b/src/Server/MySQLHandlerFactory.cpp index cbcddbb444a..3b5f28477ad 100644 --- a/src/Server/MySQLHandlerFactory.cpp +++ b/src/Server/MySQLHandlerFactory.cpp @@ -57,16 +57,16 @@ void MySQLHandlerFactory::readRSAKeys() String private_key_file_property = "openSSL.server.privateKeyFile"; if (!config.has(certificate_file_property)) - throw Exception("Certificate file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Certificate file is not set."); if (!config.has(private_key_file_property)) - throw Exception("Private key file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG); + throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "Private key file is not set."); { String certificate_file = config.getString(certificate_file_property); FILE * fp = fopen(certificate_file.data(), "r"); if (fp == nullptr) - throw Exception("Cannot open certificate file: " + certificate_file + ".", ErrorCodes::CANNOT_OPEN_FILE); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Cannot open certificate file: {}.", certificate_file); SCOPE_EXIT( if (0 != fclose(fp)) throwFromErrno("Cannot close file with the certificate in MySQLHandlerFactory", ErrorCodes::CANNOT_CLOSE_FILE); @@ -75,16 +75,16 @@ void MySQLHandlerFactory::readRSAKeys() X509 * x509 = PEM_read_X509(fp, nullptr, nullptr, nullptr); SCOPE_EXIT(X509_free(x509)); if (x509 == nullptr) - throw Exception("Failed to read PEM certificate from " + certificate_file + ". Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to read PEM certificate from {}. Error: {}", certificate_file, getOpenSSLErrors()); EVP_PKEY * p = X509_get_pubkey(x509); if (p == nullptr) - throw Exception("Failed to get RSA key from X509. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to get RSA key from X509. Error: {}", getOpenSSLErrors()); SCOPE_EXIT(EVP_PKEY_free(p)); public_key.reset(EVP_PKEY_get1_RSA(p)); if (public_key.get() == nullptr) - throw Exception("Failed to get RSA key from ENV_PKEY. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to get RSA key from ENV_PKEY. Error: {}", getOpenSSLErrors()); } { @@ -100,7 +100,7 @@ void MySQLHandlerFactory::readRSAKeys() private_key.reset(PEM_read_RSAPrivateKey(fp, nullptr, nullptr, nullptr)); if (!private_key) - throw Exception("Failed to read RSA private key from " + private_key_file + ". Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to read RSA private key from {}. Error: {}", private_key_file, getOpenSSLErrors()); } } @@ -109,19 +109,19 @@ void MySQLHandlerFactory::generateRSAKeys() LOG_TRACE(log, "Generating new RSA key pair."); public_key.reset(RSA_new()); if (!public_key) - throw Exception("Failed to allocate RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to allocate RSA key. Error: {}", getOpenSSLErrors()); BIGNUM * e = BN_new(); if (!e) - throw Exception("Failed to allocate BIGNUM. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to allocate BIGNUM. Error: {}", getOpenSSLErrors()); SCOPE_EXIT(BN_free(e)); if (!BN_set_word(e, 65537) || !RSA_generate_key_ex(public_key.get(), 2048, e, nullptr)) - throw Exception("Failed to generate RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to generate RSA key. Error: {}", getOpenSSLErrors()); private_key.reset(RSAPrivateKey_dup(public_key.get())); if (!private_key) - throw Exception("Failed to copy RSA key. Error: " + getOpenSSLErrors(), ErrorCodes::OPENSSL_ERROR); + throw Exception(ErrorCodes::OPENSSL_ERROR, "Failed to copy RSA key. Error: {}", getOpenSSLErrors()); } #endif diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index b2a3935263d..b017b87fcc1 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -282,7 +282,7 @@ void PostgreSQLHandler::processQuery() settings.max_parser_depth, settings.allow_settings_after_format_in_insert); if (!parse_res.second) - throw Exception("Cannot parse and execute the following part of query: " + String(parse_res.first), ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Cannot parse and execute the following part of query: {}", String(parse_res.first)); std::random_device rd; std::mt19937 gen(rd()); diff --git a/src/Server/ProxyV1Handler.cpp b/src/Server/ProxyV1Handler.cpp index bf02f34c6a3..e40579ba821 100644 --- a/src/Server/ProxyV1Handler.cpp +++ b/src/Server/ProxyV1Handler.cpp @@ -104,21 +104,21 @@ bool ProxyV1Handler::readWord(int max_len, std::string & word, bool & eol) } catch (const Poco::Net::NetException & e) { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), socket().peerAddress().toString()); } catch (const Poco::TimeoutException &) { - throw NetException(fmt::format("Timeout exceeded while reading from socket ({}, {} ms)", + throw NetException(ErrorCodes::SOCKET_TIMEOUT, "Timeout exceeded while reading from socket ({}, {} ms)", socket().peerAddress().toString(), - socket().getReceiveTimeout().totalMilliseconds()), ErrorCodes::SOCKET_TIMEOUT); + socket().getReceiveTimeout().totalMilliseconds()); } catch (const Poco::IOException & e) { - throw NetException(e.displayText() + ", while reading from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::NETWORK_ERROR); + throw NetException(ErrorCodes::NETWORK_ERROR, "{}, while reading from socket ({})", e.displayText(), socket().peerAddress().toString()); } if (n < 0) - throw NetException("Cannot read from socket (" + socket().peerAddress().toString() + ")", ErrorCodes::CANNOT_READ_FROM_SOCKET); + throw NetException(ErrorCodes::CANNOT_READ_FROM_SOCKET, "Cannot read from socket ({})", socket().peerAddress().toString()); return false; } diff --git a/src/Server/StaticRequestHandler.cpp b/src/Server/StaticRequestHandler.cpp index aea15f66c21..13a01ba8139 100644 --- a/src/Server/StaticRequestHandler.cpp +++ b/src/Server/StaticRequestHandler.cpp @@ -102,7 +102,9 @@ void StaticRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServer /// Workaround. Poco does not detect 411 Length Required case. if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && !request.getChunkedTransferEncoding() && !request.hasContentLength()) - throw Exception("The Transfer-Encoding is not chunked and there is no Content-Length header for POST request", ErrorCodes::HTTP_LENGTH_REQUIRED); + throw Exception(ErrorCodes::HTTP_LENGTH_REQUIRED, + "The Transfer-Encoding is not chunked and there " + "is no Content-Length header for POST request"); setResponseDefaultHeaders(response, keep_alive_timeout); response.setStatusAndReason(Poco::Net::HTTPResponse::HTTPStatus(status)); @@ -135,7 +137,7 @@ void StaticRequestHandler::writeResponse(WriteBuffer & out) String file_path = fs::weakly_canonical(user_files_absolute_path / file_name); if (!fs::exists(file_path)) - throw Exception("Invalid file name " + file_path + " for static HTTPHandler. ", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Invalid file name {} for static HTTPHandler. ", file_path); ReadBufferFromFile in(file_path); copyData(in, out); @@ -143,8 +145,9 @@ void StaticRequestHandler::writeResponse(WriteBuffer & out) else if (startsWith(response_expression, config_prefix)) { if (response_expression.size() <= config_prefix.size()) - throw Exception( "Static handling rule handler must contain a complete configuration path, for example: config://config_key", - ErrorCodes::INVALID_CONFIG_PARAMETER); + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Static handling rule handler must contain a complete configuration path, for example: " + "config://config_key"); const auto & config_path = response_expression.substr(config_prefix.size(), response_expression.size() - config_prefix.size()); writeString(server.config().getRawString(config_path, "Ok.\n"), out); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 7446078ed3f..073bde0198f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -303,7 +303,7 @@ void TCPHandler::runImpl() query_context->setExternalTablesInitializer([this] (ContextPtr context) { if (context != query_context) - throw Exception("Unexpected context in external tables initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in external tables initializer"); /// Get blocks of temporary tables readData(); @@ -318,7 +318,7 @@ void TCPHandler::runImpl() query_context->setInputInitializer([this] (ContextPtr context, const StoragePtr & input_storage) { if (context != query_context) - throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in Input initializer"); auto metadata_snapshot = input_storage->getInMemoryMetadataPtr(); state.need_receive_data_for_input = true; @@ -338,7 +338,7 @@ void TCPHandler::runImpl() query_context->setInputBlocksReaderCallback([this] (ContextPtr context) -> Block { if (context != query_context) - throw Exception("Unexpected context in InputBlocksReader", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected context in InputBlocksReader"); if (!readDataNext()) { @@ -895,7 +895,7 @@ void TCPHandler::receiveUnexpectedTablesStatusRequest() TablesStatusRequest skip_request; skip_request.read(*in, client_tcp_protocol_version); - throw NetException("Unexpected packet TablesStatusRequest received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet TablesStatusRequest received from client"); } void TCPHandler::sendPartUUIDs() @@ -1137,10 +1137,10 @@ void TCPHandler::receiveHello() if (packet_type == 'G' || packet_type == 'P') { writeString(formatHTTPErrorResponseWhenUserIsConnectedToWrongPort(server.config()), *out); - throw Exception("Client has connected to wrong port", ErrorCodes::CLIENT_HAS_CONNECTED_TO_WRONG_PORT); + throw Exception(ErrorCodes::CLIENT_HAS_CONNECTED_TO_WRONG_PORT, "Client has connected to wrong port"); } else - throw NetException("Unexpected packet from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet from client"); } readStringBinary(client_name, *in); @@ -1155,7 +1155,7 @@ void TCPHandler::receiveHello() readStringBinary(password, *in); if (user.empty()) - throw NetException("Unexpected packet from client (no user in Hello package)", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet from client (no user in Hello package)"); LOG_DEBUG(log, "Connected {} version {}.{}.{}, revision: {}{}{}.", client_name, @@ -1208,7 +1208,7 @@ void TCPHandler::receiveUnexpectedHello() readStringBinary(skip_string, *in); readStringBinary(skip_string, *in); - throw NetException("Unexpected packet Hello received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Hello received from client"); } @@ -1296,7 +1296,7 @@ bool TCPHandler::receivePacket() return false; default: - throw Exception("Unknown packet " + toString(packet_type) + " from client", ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, "Unknown packet {} from client", toString(packet_type)); } } @@ -1311,7 +1311,7 @@ void TCPHandler::receiveUnexpectedIgnoredPartUUIDs() { std::vector skip_part_uuids; readVectorBinary(skip_part_uuids, *in); - throw NetException("Unexpected packet IgnoredPartUUIDs received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet IgnoredPartUUIDs received from client"); } @@ -1334,14 +1334,14 @@ String TCPHandler::receiveReadTaskResponseAssumeLocked() } else { - throw Exception(fmt::format("Received {} packet after requesting read task", - Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Received {} packet after requesting read task", + Protocol::Client::toString(packet_type)); } } UInt64 version; readVarUInt(version, *in); if (version != DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION) - throw Exception("Protocol version for distributed processing mismatched", ErrorCodes::UNKNOWN_PROTOCOL); + throw Exception(ErrorCodes::UNKNOWN_PROTOCOL, "Protocol version for distributed processing mismatched"); String response; readStringBinary(response, *in); return response; @@ -1367,8 +1367,8 @@ std::optional TCPHandler::receivePartitionMergeTreeReadTa } else { - throw Exception(fmt::format("Received {} packet after requesting read task", - Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw Exception(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Received {} packet after requesting read task", + Protocol::Client::toString(packet_type)); } } PartitionReadResponse response; @@ -1564,7 +1564,7 @@ void TCPHandler::receiveUnexpectedQuery() if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PARAMETERS) skip_settings.read(*in, settings_format); - throw NetException("Unexpected packet Query received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Query received from client"); } bool TCPHandler::receiveData(bool scalar) @@ -1647,7 +1647,7 @@ bool TCPHandler::receiveUnexpectedData(bool throw_exception) state.read_all_data = true; if (throw_exception) - throw NetException("Unexpected packet Data received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Data received from client"); return read_ok; } @@ -1766,7 +1766,7 @@ bool TCPHandler::isQueryCancelled() { case Protocol::Client::Cancel: if (state.empty()) - throw NetException("Unexpected packet Cancel received from client", ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT, "Unexpected packet Cancel received from client"); LOG_INFO(log, "Query was cancelled."); state.is_cancelled = true; /// For testing connection collector. @@ -1781,7 +1781,7 @@ bool TCPHandler::isQueryCancelled() return true; default: - throw NetException("Unknown packet from client " + toString(packet_type), ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT); + throw NetException(ErrorCodes::UNKNOWN_PACKET_FROM_CLIENT, "Unknown packet from client {}", toString(packet_type)); } } diff --git a/src/Server/TCPProtocolStackFactory.h b/src/Server/TCPProtocolStackFactory.h index 16b57649a72..7373e6e1c4e 100644 --- a/src/Server/TCPProtocolStackFactory.h +++ b/src/Server/TCPProtocolStackFactory.h @@ -57,7 +57,7 @@ public: else if (key.starts_with("host")) allowed_client_hosts.addName(value); else - throw Exception("Unknown address pattern type: " + key, ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE); + throw Exception(ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE, "Unknown address pattern type: {}", key); } } } @@ -65,7 +65,7 @@ public: Poco::Net::TCPServerConnection * createConnection(const Poco::Net::StreamSocket & socket, TCPServer & tcp_server) override { if (!allowed_client_hosts.empty() && !allowed_client_hosts.contains(socket.peerAddress().host())) - throw Exception("Connections from " + socket.peerAddress().toString() + " are not allowed", ErrorCodes::IP_ADDRESS_NOT_ALLOWED); + throw Exception(ErrorCodes::IP_ADDRESS_NOT_ALLOWED, "Connections from {} are not allowed", socket.peerAddress().toString()); try { diff --git a/src/Server/TLSHandler.h b/src/Server/TLSHandler.h index 5b7377515c1..dd025e3e165 100644 --- a/src/Server/TLSHandler.h +++ b/src/Server/TLSHandler.h @@ -45,8 +45,7 @@ public: stack_data.socket = socket(); stack_data.certificate = certificate; #else - throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.", - ErrorCodes::SUPPORT_IS_DISABLED}; + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support."); #endif } private: diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp index 3c1426d33a5..6a1b5dac494 100644 --- a/src/Storages/AlterCommands.cpp +++ b/src/Storages/AlterCommands.cpp @@ -101,7 +101,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ if (ast_col_decl.codec) { if (ast_col_decl.default_specifier == "ALIAS") - throw Exception{"Cannot specify codec for column type ALIAS", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify codec for column type ALIAS"); command.codec = ast_col_decl.codec; } if (command_ast->column) @@ -347,8 +347,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ const auto & identifier = identifier_ast->as(); auto insertion = command.settings_resets.emplace(identifier.name()); if (!insertion.second) - throw Exception("Duplicate setting name " + backQuote(identifier.name()), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Duplicate setting name {}", backQuote(identifier.name())); } return command; } @@ -497,8 +496,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) if (if_not_exists) return; else - throw Exception{"Cannot add index " + index_name + ": index with this name already exists", - ErrorCodes::ILLEGAL_COLUMN}; + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add index {}: index with this name already exists", index_name); } auto insert_it = metadata.secondary_indices.end(); @@ -521,9 +519,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { auto hints = metadata.secondary_indices.getHints(after_index_name); auto hints_string = !hints.empty() ? ", may be you meant: " + toString(hints) : ""; - throw Exception( - "Wrong index name. Cannot find index " + backQuote(after_index_name) + " to insert after" + hints_string, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong index name. Cannot find index {} to insert after{}", + backQuote(after_index_name), hints_string); } ++insert_it; @@ -549,8 +546,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) return; auto hints = metadata.secondary_indices.getHints(index_name); auto hints_string = !hints.empty() ? ", may be you meant: " + toString(hints) : ""; - throw Exception( - "Wrong index name. Cannot find index " + backQuote(index_name) + " to drop" + hints_string, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong index name. Cannot find index {} to drop{}", + backQuote(index_name), hints_string); } metadata.secondary_indices.erase(erase_it); @@ -569,8 +566,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { if (if_not_exists) return; - throw Exception("Cannot add constraint " + constraint_name + ": constraint with this name already exists", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add constraint {}: constraint with this name already exists", + constraint_name); } auto * insert_it = constraints.end(); @@ -589,8 +586,8 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) { if (if_exists) return; - throw Exception("Wrong constraint name. Cannot find constraint `" + constraint_name + "` to drop", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong constraint name. Cannot find constraint `{}` to drop", + constraint_name); } constraints.erase(erase_it); metadata.constraints = ConstraintsDescription(constraints); @@ -684,7 +681,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context) rename_visitor.visit(index.definition_ast); } else - throw Exception("Wrong parameter type in ALTER query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong parameter type in ALTER query"); } namespace @@ -905,30 +902,20 @@ std::optional AlterCommand::tryConvertToMutationCommand(Storage return result; } -bool AlterCommands::hasInvertedIndex(const StorageInMemoryMetadata & metadata, ContextPtr context) +bool AlterCommands::hasInvertedIndex(const StorageInMemoryMetadata & metadata) { for (const auto & index : metadata.secondary_indices) { - IndexDescription index_desc; - try - { - index_desc = IndexDescription::getIndexFromAST(index.definition_ast, metadata.columns, context); - } - catch (...) - { - continue; - } - if (index.type == GinFilter::FilterName) - { + if (index.type == INVERTED_INDEX_NAME) return true; - } } return false; } + void AlterCommands::apply(StorageInMemoryMetadata & metadata, ContextPtr context) const { if (!prepared) - throw DB::Exception("Alter commands is not prepared. Cannot apply. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Alter commands is not prepared. Cannot apply. It's a bug"); auto metadata_copy = metadata; @@ -1057,7 +1044,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const auto & command = (*this)[i]; if (command.ttl && !table->supportsTTL()) - throw Exception("Engine " + table->getName() + " doesn't support TTL clause", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine {} doesn't support TTL clause", table->getName()); const auto & column_name = command.column_name; if (command.type == AlterCommand::ADD_COLUMN) @@ -1193,9 +1180,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const const auto required_columns = actions->getRequiredColumns(); if (required_columns.end() != std::find(required_columns.begin(), required_columns.end(), command.column_name)) - throw Exception("Cannot drop column " + backQuote(command.column_name) - + ", because column " + backQuote(column.name) + " depends on it", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot drop column {}, because column {} depends on it", + backQuote(command.column_name), backQuote(column.name)); } } } @@ -1203,9 +1189,10 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const } else if (!command.if_exists) { - String exception_message = fmt::format("Wrong column name. Cannot find column {} to drop", backQuote(command.column_name)); + constexpr auto message_format = "Wrong column name. Cannot find column {} to drop"; + String exception_message = fmt::format(message_format, backQuote(command.column_name)); all_columns.appendHintsMessage(exception_message, command.column_name); - throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(PreformattedMessage{exception_message, message_format}, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } } else if (command.type == AlterCommand::COMMENT_COLUMN) @@ -1214,16 +1201,17 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const { if (!command.if_exists) { - String exception_message = fmt::format("Wrong column name. Cannot find column {} to comment", backQuote(command.column_name)); + constexpr auto message_format = "Wrong column name. Cannot find column {} to comment"; + String exception_message = fmt::format(message_format, backQuote(command.column_name)); all_columns.appendHintsMessage(exception_message, command.column_name); - throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(PreformattedMessage{exception_message, message_format}, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } } } else if (command.type == AlterCommand::MODIFY_SETTING || command.type == AlterCommand::RESET_SETTING) { if (metadata.settings_changes == nullptr) - throw Exception{"Cannot alter settings, because table engine doesn't support settings changes", ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot alter settings, because table engine doesn't support settings changes"); } else if (command.type == AlterCommand::RENAME_COLUMN) { @@ -1233,28 +1221,27 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const if (next_command.type == AlterCommand::RENAME_COLUMN) { if (next_command.column_name == command.rename_to) - throw Exception{"Transitive renames in a single ALTER query are not allowed (don't make sense)", - ErrorCodes::NOT_IMPLEMENTED}; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Transitive renames in a single ALTER query are not allowed (don't make sense)"); else if (next_command.column_name == command.column_name) - throw Exception{"Cannot rename column '" + backQuote(command.column_name) - + "' to two different names in a single ALTER query", - ErrorCodes::BAD_ARGUMENTS}; + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot rename column '{}' to two different names in a single ALTER query", + backQuote(command.column_name)); } } /// TODO Implement nested rename if (all_columns.hasNested(command.column_name)) { - throw Exception{"Cannot rename whole Nested struct", ErrorCodes::NOT_IMPLEMENTED}; + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename whole Nested struct"); } if (!all_columns.has(command.column_name)) { if (!command.if_exists) { - String exception_message = fmt::format("Wrong column name. Cannot find column {} to rename", backQuote(command.column_name)); + constexpr auto message_format = "Wrong column name. Cannot find column {} to rename"; + String exception_message = fmt::format(message_format, backQuote(command.column_name)); all_columns.appendHintsMessage(exception_message, command.column_name); - throw Exception(exception_message, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(PreformattedMessage{exception_message, message_format}, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } else continue; diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h index a79827b355d..3e526dcc0bb 100644 --- a/src/Storages/AlterCommands.h +++ b/src/Storages/AlterCommands.h @@ -212,7 +212,7 @@ public: MutationCommands getMutationCommands(StorageInMemoryMetadata metadata, bool materialize_ttl, ContextPtr context, bool with_alters=false) const; /// Check if commands have any inverted index - static bool hasInvertedIndex(const StorageInMemoryMetadata & metadata, ContextPtr context); + static bool hasInvertedIndex(const StorageInMemoryMetadata & metadata); }; } diff --git a/src/Storages/ColumnDefault.cpp b/src/Storages/ColumnDefault.cpp index 3cf49ea69fc..dcb59f7bd65 100644 --- a/src/Storages/ColumnDefault.cpp +++ b/src/Storages/ColumnDefault.cpp @@ -36,7 +36,7 @@ ColumnDefaultKind columnDefaultKindFromString(const std::string & str) if (it != std::end(map)) return it->second; - throw Exception{"Unknown column default specifier: " + str, ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column default specifier: {}", str); } @@ -53,7 +53,7 @@ std::string toString(const ColumnDefaultKind kind) if (it != std::end(map)) return it->second; - throw Exception{"Invalid ColumnDefaultKind", ErrorCodes::LOGICAL_ERROR}; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid ColumnDefaultKind"); } diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp index ed2b4fd24e2..b882fee6cfa 100644 --- a/src/Storages/ColumnsDescription.cpp +++ b/src/Storages/ColumnsDescription.cpp @@ -136,7 +136,7 @@ void ColumnDescription::readText(ReadBuffer & buf) ttl = col_ast->ttl; } else - throw Exception("Cannot parse column description", ErrorCodes::CANNOT_PARSE_TEXT); + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse column description"); } } @@ -217,8 +217,7 @@ void ColumnsDescription::add(ColumnDescription column, const String & after_colu { auto range = getNameRange(columns, after_column); if (range.first == range.second) - throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after", - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Wrong column name. Cannot find column {} to insert after", after_column); insert_it = range.second; } @@ -268,7 +267,7 @@ void ColumnsDescription::modifyColumnOrder(const String & column_name, const Str auto column_range = getNameRange(columns, column_name); if (column_range.first == column_range.second) - throw Exception("There is no column " + column_name + " in table.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table.", column_name); std::vector moving_columns; for (auto list_it = column_range.first; list_it != column_range.second;) @@ -287,8 +286,7 @@ void ColumnsDescription::modifyColumnOrder(const String & column_name, const Str /// Checked first auto range = getNameRange(columns, after_column); if (range.first == range.second) - throw Exception("Wrong column name. Cannot find column " + after_column + " to insert after", - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Wrong column name. Cannot find column {} to insert after", after_column); reorder_column([&]() { return getNameRange(columns, after_column).second; }); } @@ -472,8 +470,7 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co { auto it = columns.get<1>().find(column_name); if (it == columns.get<1>().end()) - throw Exception("There is no column " + column_name + " in table.", - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table.", column_name); return *it; } @@ -807,7 +804,7 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N { for (const auto & child : default_expr_list->children) if (child->as() || child->as() || child->as()) - throw Exception("Select query is not allowed in columns DEFAULT expression", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + throw Exception(ErrorCodes::THERE_IS_NO_DEFAULT_VALUE, "Select query is not allowed in columns DEFAULT expression"); try { @@ -815,7 +812,7 @@ Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const N const auto actions = ExpressionAnalyzer(default_expr_list, syntax_analyzer_result, context).getActions(true); for (const auto & action : actions->getActions()) if (action.node->type == ActionsDAG::ActionType::ARRAY_JOIN) - throw Exception("Unsupported default value that requires ARRAY JOIN action", ErrorCodes::THERE_IS_NO_DEFAULT_VALUE); + throw Exception(ErrorCodes::THERE_IS_NO_DEFAULT_VALUE, "Unsupported default value that requires ARRAY JOIN action"); return actions->getSampleBlock(); } diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h index eea5dc7fcbb..e40187fe782 100644 --- a/src/Storages/ColumnsDescription.h +++ b/src/Storages/ColumnsDescription.h @@ -166,7 +166,7 @@ public: removeSubcolumns(it->name); if (!columns.get<1>().modify(it, std::forward(f))) - throw Exception("Cannot modify ColumnDescription for column " + column_name + ": column name cannot be changed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot modify ColumnDescription for column {}: column name cannot be changed", column_name); addSubcolumns(it->name, it->type); modifyColumnOrder(column_name, after_column, first); diff --git a/src/Storages/CompressionCodecSelector.h b/src/Storages/CompressionCodecSelector.h index 4c088924cdb..ad6e943e821 100644 --- a/src/Storages/CompressionCodecSelector.h +++ b/src/Storages/CompressionCodecSelector.h @@ -79,7 +79,7 @@ public: for (const auto & name : keys) { if (!startsWith(name, "case")) - throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown element in config: {}.{}, must be 'case'", config_prefix, name); elements.emplace_back(config, config_prefix + "." + name); } diff --git a/src/Storages/ConstraintsDescription.cpp b/src/Storages/ConstraintsDescription.cpp index 96037b46e52..5207458af8c 100644 --- a/src/Storages/ConstraintsDescription.cpp +++ b/src/Storages/ConstraintsDescription.cpp @@ -57,7 +57,7 @@ ASTs ConstraintsDescription::filterConstraints(ConstraintType selection) const case ASTConstraintDeclaration::Type::ASSUME: return static_cast(ConstraintType::ASSUME); } - throw Exception("Unknown constraint type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown constraint type."); }; ASTs res; diff --git a/src/Storages/Distributed/DirectoryMonitor.cpp b/src/Storages/Distributed/DirectoryMonitor.cpp index 39e91e19014..eb1d83af851 100644 --- a/src/Storages/Distributed/DirectoryMonitor.cpp +++ b/src/Storages/Distributed/DirectoryMonitor.cpp @@ -121,11 +121,10 @@ namespace { if (expected != calculated) { - String message = "Checksum of extra info doesn't match: corrupted data." - " Reference: " + getHexUIntLowercase(expected.first) + getHexUIntLowercase(expected.second) - + ". Actual: " + getHexUIntLowercase(calculated.first) + getHexUIntLowercase(calculated.second) - + "."; - throw Exception(message, ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, + "Checksum of extra info doesn't match: corrupted data. Reference: {}{}. Actual: {}{}.", + getHexUIntLowercase(expected.first), getHexUIntLowercase(expected.second), + getHexUIntLowercase(calculated.first), getHexUIntLowercase(calculated.second)); } } diff --git a/src/Storages/Distributed/DistributedSink.cpp b/src/Storages/Distributed/DistributedSink.cpp index 7ada07b83fe..3cf1ef2678f 100644 --- a/src/Storages/Distributed/DistributedSink.cpp +++ b/src/Storages/Distributed/DistributedSink.cpp @@ -128,7 +128,7 @@ DistributedSink::DistributedSink( { const auto & settings = context->getSettingsRef(); if (settings.max_distributed_depth && context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); context->getClientInfo().distributed_depth += 1; random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key; } @@ -265,7 +265,7 @@ void DistributedSink::waitForJobs() if (static_cast(watch.elapsedSeconds()) > insert_timeout) { ProfileEvents::increment(ProfileEvents::DistributedSyncInsertionTimeoutExceeded); - throw Exception("Synchronous distributed insert timeout exceeded.", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Synchronous distributed insert timeout exceeded."); } } @@ -359,12 +359,12 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si { /// Skip replica_index in case of internal replication if (shard_job.replicas_jobs.size() != 1) - throw Exception("There are several writing job for an automatically replicated shard", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There are several writing job for an automatically replicated shard"); /// TODO: it make sense to rewrite skip_unavailable_shards and max_parallel_replicas here auto results = shard_info.pool->getManyChecked(timeouts, &settings, PoolMode::GET_ONE, main_table.getQualifiedName()); if (results.empty() || results.front().entry.isNull()) - throw Exception("Expected exactly one connection for shard " + toString(job.shard_index), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}", toString(job.shard_index)); job.connection_entry = std::move(results.front().entry); } @@ -374,11 +374,11 @@ DistributedSink::runWritingJob(JobReplica & job, const Block & current_block, si const ConnectionPoolPtr & connection_pool = shard_info.per_replica_pools.at(job.replica_index); if (!connection_pool) - throw Exception("Connection pool for replica " + replica.readableString() + " does not exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Connection pool for replica {} does not exist", replica.readableString()); job.connection_entry = connection_pool->get(timeouts, &settings); if (job.connection_entry.isNull()) - throw Exception("Got empty connection for replica" + replica.readableString(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got empty connection for replica{}", replica.readableString()); } if (throttler) @@ -635,7 +635,7 @@ void DistributedSink::writeAsyncImpl(const Block & block, size_t shard_id) settings.prefer_localhost_replica, settings.use_compact_format_in_distributed_parts_names); if (path.empty()) - throw Exception("Directory name for async inserts is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Directory name for async inserts is empty"); writeToShard(shard_info, block_to_send, {path}); } } diff --git a/src/Storages/ExternalDataSourceConfiguration.cpp b/src/Storages/ExternalDataSourceConfiguration.cpp index c13c3c4f1ef..d7c3fe44f38 100644 --- a/src/Storages/ExternalDataSourceConfiguration.cpp +++ b/src/Storages/ExternalDataSourceConfiguration.cpp @@ -138,7 +138,8 @@ std::optional getExternalDataSourceConfiguration( || configuration.database.empty() || (configuration.table.empty() && !is_database_engine))) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some of the parameters and no key-value arguments are added"); + "Named collection of connection parameters is missing some " + "of the parameters and no key-value arguments are added"); } /// Check key-value arguments. @@ -250,7 +251,8 @@ std::optional getExternalDataSourceConfiguration( if (configuration.host.empty() || configuration.port == 0 || configuration.username.empty() || configuration.table.empty()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some of the parameters and dictionary parameters are not added"); + "Named collection of connection parameters is missing some " + "of the parameters and dictionary parameters are not added"); } return ExternalDataSourceInfo{ .configuration = configuration, .specific_args = {}, .settings_changes = config_settings }; } @@ -373,7 +375,8 @@ ExternalDataSourcesByPriority getExternalDataSourceConfigurationByPriority( || replica_configuration.username.empty() || replica_configuration.password.empty()) { throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Named collection of connection parameters is missing some of the parameters and no other dictionary parameters are added"); + "Named collection of connection parameters is missing some " + "of the parameters and no other dictionary parameters are added"); } configuration.replicas_configurations[priority].emplace_back(replica_configuration); diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 5835dc3294f..7838db881e9 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -306,7 +306,8 @@ Pipe StorageFileLog::read( { /// If there are MVs depended on this table, we just forbid reading if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, + "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); if (mv_attached) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageFileLog with attached materialized views"); @@ -314,7 +315,7 @@ Pipe StorageFileLog::read( std::lock_guard lock(file_infos_mutex); if (running_streams) { - throw Exception("Another select query is running on this table, need to wait it finish.", ErrorCodes::CANNOT_SELECT); + throw Exception(ErrorCodes::CANNOT_SELECT, "Another select query is running on this table, need to wait it finish."); } updateFileInfos(); @@ -670,7 +671,7 @@ bool StorageFileLog::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist", table_id.getNameForLogs()); auto metadata_snapshot = getInMemoryMetadataPtr(); auto storage_snapshot = getStorageSnapshot(metadata_snapshot, getContext()); @@ -766,35 +767,34 @@ void registerStorageFileLog(StorageFactory & factory) } else if (num_threads < 1) { - throw Exception("Number of threads to parse files can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of threads to parse files can not be lower than 1"); } if (filelog_settings->max_block_size.changed && filelog_settings->max_block_size.value < 1) { - throw Exception("filelog_max_block_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "filelog_max_block_size can not be lower than 1"); } if (filelog_settings->poll_max_batch_size.changed && filelog_settings->poll_max_batch_size.value < 1) { - throw Exception("filelog_poll_max_batch_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "filelog_poll_max_batch_size can not be lower than 1"); } size_t init_sleep_time = filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds(); size_t max_sleep_time = filelog_settings->poll_directory_watch_events_backoff_max.totalMilliseconds(); if (init_sleep_time > max_sleep_time) { - throw Exception( - "poll_directory_watch_events_backoff_init can not be greater than poll_directory_watch_events_backoff_max", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "poll_directory_watch_events_backoff_init can not " + "be greater than poll_directory_watch_events_backoff_max"); } if (filelog_settings->poll_directory_watch_events_backoff_factor.changed && !filelog_settings->poll_directory_watch_events_backoff_factor.value) - throw Exception("poll_directory_watch_events_backoff_factor can not be 0", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "poll_directory_watch_events_backoff_factor can not be 0"); if (args_count != 2) - throw Exception( - "Arguments size of StorageFileLog should be 2, path and format name", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Arguments size of StorageFileLog should be 2, path and format name"); auto path_ast = evaluateConstantExpressionAsLiteral(engine_args[0], args.getContext()); auto format_ast = evaluateConstantExpressionAsLiteral(engine_args[1], args.getContext()); diff --git a/src/Storages/Freeze.cpp b/src/Storages/Freeze.cpp index 74adf3de0ae..d2e19551c92 100644 --- a/src/Storages/Freeze.cpp +++ b/src/Storages/Freeze.cpp @@ -132,7 +132,9 @@ BlockIO Unfreezer::systemUnfreeze(const String & backup_name) static constexpr auto config_key = "enable_system_unfreeze"; if (!config.getBool(config_key, false)) { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Support for SYSTEM UNFREEZE query is disabled. You can enable it via '{}' server setting", config_key); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Support for SYSTEM UNFREEZE query is disabled. You can enable it via '{}' server setting", + config_key); } auto disks_map = local_context->getDisksMap(); diff --git a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp index c95a1104f83..dac841359da 100644 --- a/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/AsynchronousReadBufferFromHDFS.cpp @@ -179,10 +179,10 @@ off_t AsynchronousReadBufferFromHDFS::seek(off_t offset, int whence) ProfileEvents::increment(ProfileEvents::RemoteFSSeeks); if (whence != SEEK_SET) - throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); if (offset < 0) - throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", offset); size_t new_pos = offset; diff --git a/src/Storages/HDFS/HDFSCommon.cpp b/src/Storages/HDFS/HDFSCommon.cpp index c6b4a5da8b0..932e80831fe 100644 --- a/src/Storages/HDFS/HDFSCommon.cpp +++ b/src/Storages/HDFS/HDFSCommon.cpp @@ -75,8 +75,7 @@ void HDFSBuilderWrapper::loadFromConfig(const Poco::Util::AbstractConfiguration #if USE_KRB5 if (isUser) { - throw Exception("hadoop.security.kerberos.ticket.cache.path cannot be set per user", - ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG); + throw Exception(ErrorCodes::EXCESSIVE_ELEMENT_IN_CONFIG, "hadoop.security.kerberos.ticket.cache.path cannot be set per user"); } hadoop_security_kerberos_ticket_cache_path = config.getString(key_path); @@ -103,7 +102,7 @@ void HDFSBuilderWrapper::runKinit() } catch (const DB::Exception & e) { - throw Exception("KerberosInit failure: "+ getExceptionMessage(e, false), ErrorCodes::KERBEROS_ERROR); + throw Exception(ErrorCodes::KERBEROS_ERROR, "KerberosInit failure: {}", getExceptionMessage(e, false)); } LOG_DEBUG(&Poco::Logger::get("HDFSClient"), "Finished KerberosInit"); } @@ -116,13 +115,12 @@ HDFSBuilderWrapper createHDFSBuilder(const String & uri_str, const Poco::Util::A auto port = uri.getPort(); const String path = "//"; if (host.empty()) - throw Exception("Illegal HDFS URI: " + uri.toString(), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal HDFS URI: {}", uri.toString()); HDFSBuilderWrapper builder; if (builder.get() == nullptr) - throw Exception("Unable to create builder to connect to HDFS: " + - uri.toString() + " " + String(hdfsGetLastError()), - ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Unable to create builder to connect to HDFS: {} {}", + uri.toString(), String(hdfsGetLastError())); hdfsBuilderConfSetStr(builder.get(), "input.read.timeout", "60000"); // 1 min hdfsBuilderConfSetStr(builder.get(), "input.write.timeout", "60000"); // 1 min @@ -175,8 +173,7 @@ HDFSFSPtr createHDFSFS(hdfsBuilder * builder) { HDFSFSPtr fs(hdfsBuilderConnect(builder)); if (fs == nullptr) - throw Exception("Unable to connect to HDFS: " + String(hdfsGetLastError()), - ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Unable to connect to HDFS: {}", String(hdfsGetLastError())); return fs; } diff --git a/src/Storages/HDFS/ReadBufferFromHDFS.cpp b/src/Storages/HDFS/ReadBufferFromHDFS.cpp index f7d36132f9a..7f80dcce2d2 100644 --- a/src/Storages/HDFS/ReadBufferFromHDFS.cpp +++ b/src/Storages/HDFS/ReadBufferFromHDFS.cpp @@ -180,10 +180,10 @@ bool ReadBufferFromHDFS::nextImpl() off_t ReadBufferFromHDFS::seek(off_t offset_, int whence) { if (whence != SEEK_SET) - throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE); + throw Exception(ErrorCodes::CANNOT_SEEK_THROUGH_FILE, "Only SEEK_SET mode is allowed."); if (offset_ < 0) - throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND); + throw Exception(ErrorCodes::SEEK_POSITION_OUT_OF_BOUND, "Seek position is out of bounds. Offset: {}", offset_); if (!working_buffer.empty() && size_t(offset_) >= impl->getPosition() - working_buffer.size() diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index b0bad44092a..c915213f4ac 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -132,7 +132,7 @@ namespace return {uri.substr(pos), uri.substr(0, pos)}; } - throw Exception("Storage HDFS requires valid URL to be set", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage HDFS requires valid URL to be set"); } std::vector getPathsList(const String & path_from_uri, const String & uri_without_path, ContextPtr context, std::unordered_map * last_mod_times = nullptr) @@ -204,9 +204,8 @@ ColumnsDescription StorageHDFS::getTableStructureFromData( if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path. You must " - "specify table structure manually", - format); + "Cannot extract table structure from {} format file, because there are no files in HDFS with provided path." + " You must specify table structure manually", format); std::optional columns_from_cache; if (ctx->getSettingsRef().schema_inference_use_cache_for_hdfs) @@ -342,13 +341,6 @@ HDFSSource::HDFSSource( initialize(); } -void HDFSSource::onCancel() -{ - std::lock_guard lock(reader_mutex); - if (reader) - reader->cancel(); -} - bool HDFSSource::initialize() { current_path = (*file_iterator)(); @@ -388,8 +380,12 @@ Chunk HDFSSource::generate() { while (true) { - if (!reader || isCancelled()) + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); break; + } Chunk chunk; if (reader->pull(chunk)) @@ -417,15 +413,12 @@ Chunk HDFSSource::generate() return Chunk(std::move(columns), num_rows); } - { - std::lock_guard lock(reader_mutex); - reader.reset(); - pipeline.reset(); - read_buf.reset(); + reader.reset(); + pipeline.reset(); + read_buf.reset(); - if (!initialize()) - break; - } + if (!initialize()) + break; } return {}; } @@ -717,8 +710,9 @@ void registerStorageHDFS(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.empty() || engine_args.size() > 3) - throw Exception( - "Storage HDFS requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage HDFS requires 1, 2 or 3 arguments: " + "url, name of used format (taken from file extension by default) and optional compression method."); engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext()); diff --git a/src/Storages/HDFS/StorageHDFS.h b/src/Storages/HDFS/StorageHDFS.h index b641f5bfb43..585f5df6ceb 100644 --- a/src/Storages/HDFS/StorageHDFS.h +++ b/src/Storages/HDFS/StorageHDFS.h @@ -142,8 +142,6 @@ public: Chunk generate() override; - void onCancel() override; - private: StorageHDFSPtr storage; Block block_for_format; @@ -155,8 +153,6 @@ private: std::unique_ptr read_buf; std::unique_ptr pipeline; std::unique_ptr reader; - /// onCancel and generate can be called concurrently. - std::mutex reader_mutex; String current_path; /// Recreate ReadBuffer and PullingPipelineExecutor for each file. diff --git a/src/Storages/HDFS/StorageHDFSCluster.cpp b/src/Storages/HDFS/StorageHDFSCluster.cpp index 71007ebc371..91204d852ae 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.cpp +++ b/src/Storages/HDFS/StorageHDFSCluster.cpp @@ -42,15 +42,17 @@ StorageHDFSCluster::StorageHDFSCluster( const String & format_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const String & compression_method_) + const String & compression_method_, + bool structure_argument_was_provided_) : IStorageCluster(table_id_) , cluster_name(cluster_name_) , uri(uri_) , format_name(format_name_) , compression_method(compression_method_) + , structure_argument_was_provided(structure_argument_was_provided_) { - context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); checkHDFSURL(uri_); + context_->getRemoteHostFilter().checkURL(Poco::URI(uri_)); StorageInMemoryMetadata storage_metadata; @@ -58,7 +60,6 @@ StorageHDFSCluster::StorageHDFSCluster( { auto columns = StorageHDFS::getTableStructureFromData(format_name, uri_, compression_method, context_); storage_metadata.setColumns(columns); - add_columns_structure_to_query = true; } else storage_metadata.setColumns(columns_); @@ -91,7 +92,7 @@ Pipe StorageHDFSCluster::read( const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; auto query_to_send = query_info.original_query->clone(); - if (add_columns_structure_to_query) + if (!structure_argument_was_provided) addColumnsStructureToQueryWithClusterEngine( query_to_send, StorageDictionary::generateNamesAndTypesDescription(storage_snapshot->metadata->getColumns().getAll()), 3, getName()); diff --git a/src/Storages/HDFS/StorageHDFSCluster.h b/src/Storages/HDFS/StorageHDFSCluster.h index 08f67aef582..4d6548a6b78 100644 --- a/src/Storages/HDFS/StorageHDFSCluster.h +++ b/src/Storages/HDFS/StorageHDFSCluster.h @@ -28,7 +28,8 @@ public: const String & format_name_, const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, - const String & compression_method_); + const String & compression_method_, + bool structure_argument_was_provided_); std::string getName() const override { return "HDFSCluster"; } @@ -48,7 +49,7 @@ private: String uri; String format_name; String compression_method; - bool add_columns_structure_to_query = false; + bool structure_argument_was_provided; }; diff --git a/src/Storages/HDFS/WriteBufferFromHDFS.cpp b/src/Storages/HDFS/WriteBufferFromHDFS.cpp index f7a34be42e6..2198bb65761 100644 --- a/src/Storages/HDFS/WriteBufferFromHDFS.cpp +++ b/src/Storages/HDFS/WriteBufferFromHDFS.cpp @@ -51,8 +51,8 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl if (fout == nullptr) { - throw Exception("Unable to open HDFS file: " + path + " error: " + std::string(hdfsGetLastError()), - ErrorCodes::CANNOT_OPEN_FILE); + throw Exception(ErrorCodes::CANNOT_OPEN_FILE, "Unable to open HDFS file: {} error: {}", + path, std::string(hdfsGetLastError())); } } @@ -66,8 +66,7 @@ struct WriteBufferFromHDFS::WriteBufferFromHDFSImpl { int bytes_written = hdfsWrite(fs.get(), fout, start, safe_cast(size)); if (bytes_written < 0) - throw Exception("Fail to write HDFS file: " + hdfs_uri + " " + std::string(hdfsGetLastError()), - ErrorCodes::NETWORK_ERROR); + throw Exception(ErrorCodes::NETWORK_ERROR, "Fail to write HDFS file: {} {}", hdfs_uri, std::string(hdfsGetLastError())); if (write_settings.remote_throttler) write_settings.remote_throttler->add(bytes_written, ProfileEvents::RemoteWriteThrottlerBytes, ProfileEvents::RemoteWriteThrottlerSleepMicroseconds); diff --git a/src/Storages/Hive/HiveFile.cpp b/src/Storages/Hive/HiveFile.cpp index 219fe562f2c..af727b744f3 100644 --- a/src/Storages/Hive/HiveFile.cpp +++ b/src/Storages/Hive/HiveFile.cpp @@ -249,9 +249,8 @@ void HiveORCFile::loadSplitMinMaxIndexesImpl() auto stripe_num = raw_reader->getNumberOfStripes(); auto stripe_stats_num = raw_reader->getNumberOfStripeStatistics(); if (stripe_num != stripe_stats_num) - throw Exception( - fmt::format("orc file:{} has different strip num {} and strip statistics num {}", path, stripe_num, stripe_stats_num), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "orc file:{} has different strip num {} and strip statistics num {}", path, stripe_num, stripe_stats_num); split_minmax_idxes.resize(stripe_num); for (size_t i = 0; i < stripe_num; ++i) diff --git a/src/Storages/Hive/HiveFile.h b/src/Storages/Hive/HiveFile.h index 1556d6860c1..1f5e31f1d54 100644 --- a/src/Storages/Hive/HiveFile.h +++ b/src/Storages/Hive/HiveFile.h @@ -72,7 +72,7 @@ public: { return VALID_HDFS_FORMATS.find(format_class)->second; } - throw Exception("Unsupported hdfs file format " + format_class, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsupported hdfs file format {}", format_class); } IHiveFile( @@ -134,12 +134,12 @@ public: protected: virtual void loadFileMinMaxIndexImpl() { - throw Exception("Method loadFileMinMaxIndexImpl is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadFileMinMaxIndexImpl is not supported by hive file:{}", getFormatName()); } virtual void loadSplitMinMaxIndexesImpl() { - throw Exception("Method loadSplitMinMaxIndexesImpl is not supported by hive file:" + getFormatName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method loadSplitMinMaxIndexesImpl is not supported by hive file:{}", getFormatName()); } virtual std::optional getRowsImpl() = 0; diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 3fb7be5b697..445f496bbed 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -451,9 +451,9 @@ void StorageHive::lazyInitialize() format_name = "HiveText"; break; case FileFormat::RC_FILE: - throw Exception("Unsopported hive format rc_file", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsopported hive format rc_file"); case FileFormat::SEQUENCE_FILE: - throw Exception("Unsopported hive format sequence_file", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Unsopported hive format sequence_file"); case FileFormat::AVRO: format_name = "Avro"; break; @@ -556,7 +556,7 @@ static HiveFilePtr createHiveFile( } else { - throw Exception("IHiveFile not implemented for format " + format_name, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "IHiveFile not implemented for format {}", format_name); } return hive_file; } @@ -587,9 +587,8 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( /// Check partition values if (partition.values.size() != partition_names.size()) - throw Exception( - fmt::format("Partition value size not match, expect {}, but got {}", partition_names.size(), partition.values.size()), - ErrorCodes::INVALID_PARTITION_VALUE); + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, + "Partition value size not match, expect {}, but got {}", partition_names.size(), partition.values.size()); /// Join partition values in CSV format WriteBufferFromOwnString wb; @@ -608,7 +607,7 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( auto reader = std::make_unique(pipeline); Block block; if (!reader->pull(block) || !block.rows()) - throw Exception("Could not parse partition value: " + wb.str(), ErrorCodes::INVALID_PARTITION_VALUE); + throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Could not parse partition value: {}", wb.str()); /// Get partition values FieldVector fields(partition_names.size()); @@ -861,7 +860,11 @@ HiveFiles StorageHive::collectHiveFiles( hit_parttions_num += 1; if (hive_max_query_partitions > 0 && hit_parttions_num > hive_max_query_partitions) { - throw Exception(ErrorCodes::TOO_MANY_PARTITIONS, "Too many partitions to query for table {}.{} . Maximum number of partitions to read is limited to {}", hive_database, hive_table, hive_max_query_partitions); + throw Exception(ErrorCodes::TOO_MANY_PARTITIONS, + "Too many partitions " + "to query for table {}.{} . Maximum number of partitions " + "to read is limited to {}", + hive_database, hive_table, hive_max_query_partitions); } hive_files.insert(std::end(hive_files), std::begin(hive_files_in_partition), std::end(hive_files_in_partition)); } @@ -891,7 +894,7 @@ HiveFiles StorageHive::collectHiveFiles( SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /* metadata_snapshot*/, ContextPtr /*context*/) { - throw Exception("Method write is not implemented for StorageHive", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for StorageHive"); } NamesAndTypesList StorageHive::getVirtuals() const @@ -968,13 +971,13 @@ void registerStorageHive(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() != 3) - throw Exception( - "Storage Hive requires 3 arguments: hive metastore address, hive database and hive table", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Hive requires 3 arguments: " + "hive metastore address, hive database and hive table"); auto * partition_by = args.storage_def->partition_by; if (!partition_by) - throw Exception("Storage Hive requires partition by clause", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage Hive requires partition by clause"); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 76100624d51..9bcfff65c95 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -39,12 +39,9 @@ RWLockImpl::LockHolder IStorage::tryLockTimed( if (!lock_holder) { const String type_str = type == RWLockImpl::Type::Read ? "READ" : "WRITE"; - throw Exception( - type_str + " locking attempt on \"" + getStorageID().getFullTableName() + "\" has timed out! (" - + std::to_string(acquire_timeout.count()) - + "ms) " - "Possible deadlock avoided. Client should retry.", - ErrorCodes::DEADLOCK_AVOIDED); + throw Exception(ErrorCodes::DEADLOCK_AVOIDED, + "{} locking attempt on \"{}\" has timed out! ({}ms) Possible deadlock avoided. Client should retry.", + type_str, getStorageID(), acquire_timeout.count()); } return lock_holder; } @@ -53,10 +50,10 @@ TableLockHolder IStorage::lockForShare(const String & query_id, const std::chron { TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); - if (is_dropped) + if (is_dropped || is_detached) { auto table_id = getStorageID(); - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped or detached", table_id.database_name, table_id.table_name); } return result; } @@ -65,7 +62,7 @@ TableLockHolder IStorage::tryLockForShare(const String & query_id, const std::ch { TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); - if (is_dropped) + if (is_dropped || is_detached) { // Table was dropped while acquiring the lock result = nullptr; @@ -82,10 +79,10 @@ IStorage::AlterLockHolder IStorage::lockForAlter(const std::chrono::milliseconds throw Exception(ErrorCodes::DEADLOCK_AVOIDED, "Locking attempt for ALTER on \"{}\" has timed out! ({} ms) " "Possible deadlock avoided. Client should retry.", - getStorageID().getFullTableName(), std::to_string(acquire_timeout.count())); + getStorageID().getFullTableName(), acquire_timeout.count()); - if (is_dropped) - throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); + if (is_dropped || is_detached) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is dropped or detached", getStorageID()); return lock; } @@ -96,8 +93,8 @@ TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, cons TableExclusiveLockHolder result; result.drop_lock = tryLockTimed(drop_lock, RWLockImpl::Write, query_id, acquire_timeout); - if (is_dropped) - throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); + if (is_dropped || is_detached) + throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {} is dropped or detached", getStorageID()); return result; } @@ -110,7 +107,7 @@ Pipe IStorage::watch( size_t /*max_block_size*/, size_t /*num_streams*/) { - throw Exception("Method watch is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method watch is not supported by storage {}", getName()); } Pipe IStorage::read( @@ -122,7 +119,7 @@ Pipe IStorage::read( size_t /*max_block_size*/, size_t /*num_streams*/) { - throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method read is not supported by storage {}", getName()); } void IStorage::read( @@ -170,7 +167,7 @@ std::optional IStorage::distributedWrite( Pipe IStorage::alterPartition( const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, ContextPtr /* context */) { - throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Partition operations are not supported by storage {}", getName()); } void IStorage::alter(const AlterCommands & params, ContextPtr context, AlterLockHolder &) @@ -194,13 +191,13 @@ void IStorage::checkAlterIsPossible(const AlterCommands & commands, ContextPtr / void IStorage::checkMutationIsPossible(const MutationCommands & /*commands*/, const Settings & /*settings*/) const { - throw Exception("Table engine " + getName() + " doesn't support mutations", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support mutations", getName()); } void IStorage::checkAlterPartitionIsPossible( const PartitionCommands & /*commands*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & /*settings*/) const { - throw Exception("Table engine " + getName() + " doesn't support partitioning", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine {} doesn't support partitioning", getName()); } StorageID IStorage::getStorageID() const diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 7d927b51e5f..a4825358d6e 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -19,7 +19,6 @@ #include #include -#include #include @@ -392,7 +391,7 @@ public: const StorageMetadataPtr & /*metadata_snapshot*/, ContextPtr /*context*/) { - throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not supported by storage {}", getName()); } /** Writes the data to a table in distributed manner. @@ -424,7 +423,7 @@ public: ContextPtr /* context */, TableExclusiveLockHolder &) { - throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Truncate is not supported by storage {}", getName()); } virtual void checkTableCanBeRenamed(const StorageID & /*new_name*/) const {} @@ -484,35 +483,35 @@ public: const Names & /* deduplicate_by_columns */, ContextPtr /*context*/) { - throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method optimize is not supported by storage {}", getName()); } /// Mutate the table contents virtual void mutate(const MutationCommands &, ContextPtr, bool /*force_wait*/) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } /// Cancel a mutation. virtual CancellationCode killMutation(const String & /*mutation_id*/) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } virtual void waitForMutation(const String & /*mutation_id*/) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } virtual void setMutationCSN(const String & /*mutation_id*/, UInt64 /*csn*/) { - throw Exception("Mutations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Mutations are not supported by storage {}", getName()); } /// Cancel a part move to shard. virtual CancellationCode killPartMoveToShard(const UUID & /*task_uuid*/) { - throw Exception("Part moves between shards are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Part moves between shards are not supported by storage {}", getName()); } /** If the table have to do some complicated work on startup, @@ -562,6 +561,7 @@ public: virtual void onActionLockRemove(StorageActionBlockType /* action_type */) {} std::atomic is_dropped{false}; + std::atomic is_detached{false}; /// Does table support index for IN sections virtual bool supportsIndexForIn() const { return false; } @@ -570,7 +570,7 @@ public: virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, ContextPtr /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const { return false; } /// Checks validity of the data - virtual CheckResults checkData(const ASTPtr & /* query */, ContextPtr /* context */) { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); } + virtual CheckResults checkData(const ASTPtr & /* query */, ContextPtr /* context */) { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Check query is not supported for {} storage", getName()); } /// Checks that table could be dropped right now /// Otherwise - throws an exception with detailed information. diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp index 68cf6dfbb28..2e07aceeaa9 100644 --- a/src/Storages/IndicesDescription.cpp +++ b/src/Storages/IndicesDescription.cpp @@ -72,16 +72,16 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast { const auto * index_definition = definition_ast->as(); if (!index_definition) - throw Exception("Cannot create skip index from non ASTIndexDeclaration AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create skip index from non ASTIndexDeclaration AST"); if (index_definition->name.empty()) - throw Exception("Skip index must have name in definition.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Skip index must have name in definition."); if (!index_definition->type) - throw Exception("TYPE is required for index", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "TYPE is required for index"); if (index_definition->type->parameters && !index_definition->type->parameters->children.empty()) - throw Exception("Index type cannot have parameters", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Index type cannot have parameters"); IndexDescription result; result.definition_ast = index_definition->clone(); @@ -111,7 +111,7 @@ IndexDescription IndexDescription::getIndexFromAST(const ASTPtr & definition_ast { const auto * argument = definition_arguments->children[i]->as(); if (!argument) - throw Exception("Only literals can be skip index arguments", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Only literals can be skip index arguments"); result.arguments.emplace_back(argument->value); } } diff --git a/src/Storages/Kafka/KafkaConsumer.cpp b/src/Storages/Kafka/KafkaConsumer.cpp index 1cfbd145fb1..b3e0c6a8839 100644 --- a/src/Storages/Kafka/KafkaConsumer.cpp +++ b/src/Storages/Kafka/KafkaConsumer.cpp @@ -263,7 +263,9 @@ void KafkaConsumer::commit() { // TODO: insert atomicity / transactions is needed here (possibility to rollback, on 2 phase commits) ProfileEvents::increment(ProfileEvents::KafkaCommitFailures); - throw Exception("All commit attempts failed. Last block was already written to target table(s), but was not committed to Kafka.", ErrorCodes::CANNOT_COMMIT_OFFSET); + throw Exception(ErrorCodes::CANNOT_COMMIT_OFFSET, + "All commit attempts failed. Last block was already written to target table(s), " + "but was not committed to Kafka."); } else { diff --git a/src/Storages/Kafka/KafkaSource.cpp b/src/Storages/Kafka/KafkaSource.cpp index db528adec79..c456ab1550a 100644 --- a/src/Storages/Kafka/KafkaSource.cpp +++ b/src/Storages/Kafka/KafkaSource.cpp @@ -156,7 +156,7 @@ Chunk KafkaSource::generateImpl() // KafkaConsumer::messages, which is accessed from // KafkaConsumer::currentTopic() (and other helpers). if (consumer->isStalled()) - throw Exception("Polled messages became unusable", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Polled messages became unusable"); ProfileEvents::increment(ProfileEvents::KafkaRowsRead, new_rows); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 635d0e7864a..bc0ef9c2c0a 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -122,7 +122,7 @@ struct StorageKafkaInterceptors return thread_status_ptr.get() == current_thread; }); if (it == self->thread_statuses.end()) - throw Exception("No thread status for this librdkafka thread.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No thread status for this librdkafka thread."); self->thread_statuses.erase(it); @@ -298,7 +298,8 @@ Pipe StorageKafka::read( return {}; if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, + "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); if (mv_attached) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageKafka with attached materialized views"); @@ -334,7 +335,7 @@ SinkToStoragePtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & ProfileEvents::increment(ProfileEvents::KafkaWrites); if (topics.size() > 1) - throw Exception("Can't write to Kafka table with multiple topics!", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't write to Kafka table with multiple topics!"); cppkafka::Configuration conf; conf.set("metadata.broker.list", brokers); @@ -673,7 +674,7 @@ bool StorageKafka::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); CurrentMetrics::Increment metric_increment{CurrentMetrics::KafkaBackgroundReads}; ProfileEvents::increment(ProfileEvents::KafkaBackgroundReads); @@ -858,25 +859,28 @@ void registerStorageKafka(StorageFactory & factory) if (!args.getLocalContext()->getSettingsRef().kafka_disable_num_consumers_limit && num_consumers > max_consumers) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "The number of consumers can not be bigger than {}. " - "A single consumer can read any number of partitions. Extra consumers are relatively expensive, " - "and using a lot of them can lead to high memory and CPU usage. To achieve better performance " + "A single consumer can read any number of partitions. " + "Extra consumers are relatively expensive, " + "and using a lot of them can lead to high memory and CPU usage. " + "To achieve better performance " "of getting data from Kafka, consider using a setting kafka_thread_per_consumer=1, " - "and ensure you have enough threads in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " + "and ensure you have enough threads " + "in MessageBrokerSchedulePool (background_message_broker_schedule_pool_size). " "See also https://clickhouse.com/docs/integrations/kafka/kafka-table-engine#tuning-performance", max_consumers); } else if (num_consumers < 1) { - throw Exception("Number of consumers can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Number of consumers can not be lower than 1"); } if (kafka_settings->kafka_max_block_size.changed && kafka_settings->kafka_max_block_size.value < 1) { - throw Exception("kafka_max_block_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_max_block_size can not be lower than 1"); } if (kafka_settings->kafka_poll_max_batch_size.changed && kafka_settings->kafka_poll_max_batch_size.value < 1) { - throw Exception("kafka_poll_max_batch_size can not be lower than 1", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "kafka_poll_max_batch_size can not be lower than 1"); } return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(kafka_settings), collection_name); diff --git a/src/Storages/KeyDescription.cpp b/src/Storages/KeyDescription.cpp index f100f129cda..c407cef627d 100644 --- a/src/Storages/KeyDescription.cpp +++ b/src/Storages/KeyDescription.cpp @@ -61,7 +61,7 @@ KeyDescription & KeyDescription::operator=(const KeyDescription & other) /// additional_column is constant property It should never be lost. if (additional_column.has_value() && !other.additional_column.has_value()) - throw Exception("Wrong key assignment, losing additional_column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong key assignment, losing additional_column"); additional_column = other.additional_column; return *this; } diff --git a/src/Storages/LiveView/LiveViewCommands.h b/src/Storages/LiveView/LiveViewCommands.h index ebf196cea76..2bb2dfb2752 100644 --- a/src/Storages/LiveView/LiveViewCommands.h +++ b/src/Storages/LiveView/LiveViewCommands.h @@ -58,7 +58,7 @@ public: void validate(const IStorage & table) { if (!empty() && !dynamic_cast(&table)) - throw Exception("Wrong storage type. Must be StorageLiveView", DB::ErrorCodes::UNKNOWN_STORAGE); + throw Exception(DB::ErrorCodes::UNKNOWN_STORAGE, "Wrong storage type. Must be StorageLiveView"); } }; diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index c92968e4bcc..8f36ea4d91d 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -79,10 +79,11 @@ static StorageID extractDependentTable(ASTPtr & query, ContextPtr context, const { auto * ast_select = subquery->as(); if (!ast_select) - throw Exception("LIVE VIEWs are only supported for queries from tables, but there is no table name in select query.", - DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW); + throw Exception(DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, + "LIVE VIEWs are only supported for queries from tables, " + "but there is no table name in select query."); if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, "UNION is not supported for LIVE VIEW"); inner_subquery = ast_select->list_of_selects->children.at(0)->clone(); @@ -293,11 +294,11 @@ StorageLiveView::StorageLiveView( setInMemoryMetadata(storage_metadata); if (!query.select) - throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// Default value, if only table name exist in the query if (query.select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for LIVE VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_LIVE_VIEW, "UNION is not supported for LIVE VIEW"); inner_query = query.select->list_of_selects->children.at(0); @@ -469,7 +470,6 @@ void StorageLiveView::drop() DatabaseCatalog::instance().removeViewDependency(select_table_id, table_id); std::lock_guard lock(mutex); - is_dropped = true; condition.notify_all(); } @@ -607,9 +607,8 @@ void registerStorageLiveView(StorageFactory & factory) factory.registerStorage("LiveView", [](const StorageFactory::Arguments & args) { if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_live_view) - throw Exception( - "Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental LIVE VIEW feature is not enabled (the setting 'allow_experimental_live_view')"); return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.comment); }); diff --git a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp index 3eca27ef3a8..fc91f58c837 100644 --- a/src/Storages/MeiliSearch/StorageMeiliSearch.cpp +++ b/src/Storages/MeiliSearch/StorageMeiliSearch.cpp @@ -101,7 +101,7 @@ Pipe StorageMeiliSearch::read( auto str = el->getColumnName(); auto it = find(str.begin(), str.end(), '='); if (it == str.end()) - throw Exception("meiliMatch function must have parameters of the form \'key=value\'", ErrorCodes::BAD_QUERY_PARAMETER); + throw Exception(ErrorCodes::BAD_QUERY_PARAMETER, "meiliMatch function must have parameters of the form \'key=value\'"); String key(str.begin() + 1, it); String value(it + 1, str.end() - 1); diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index 5fb22f4161e..5b7965bc3a0 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -47,7 +47,10 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & if (!part_info.contains(it->first)) { if (!part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartNameForLogs(), it->first.getPartNameForLogs()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Part {} intersects previous part {}. " + "It is a bug or a result of manual intervention in the ZooKeeper data.", + part_info.getPartNameForLogs(), it->first.getPartNameForLogs()); ++it; break; } @@ -70,7 +73,9 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String & } if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Part {} intersects part {}. It is a bug or a result of manual intervention " + "in the ZooKeeper data.", name, it->first.getPartNameForLogs()); part_info_to_name.emplace(part_info, name); return true; diff --git a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp index b65b670d87f..7a8a4cd4347 100644 --- a/src/Storages/MergeTree/AsyncBlockIDsCache.cpp +++ b/src/Storages/MergeTree/AsyncBlockIDsCache.cpp @@ -31,10 +31,11 @@ std::vector AsyncBlockIDsCache::getChildren() { auto zookeeper = storage.getZooKeeper(); - auto watch_callback = [&](const Coordination::WatchResponse &) + auto watch_callback = [last_time = this->last_updatetime.load() + , update_min_interval = this->update_min_interval + , task = task->shared_from_this()](const Coordination::WatchResponse &) { auto now = std::chrono::steady_clock::now(); - auto last_time = last_updatetime.load(); if (now - last_time < update_min_interval) { std::chrono::milliseconds sleep_time = std::chrono::duration_cast(update_min_interval - (now - last_time)); diff --git a/src/Storages/MergeTree/CommonANNIndexes.cpp b/src/Storages/MergeTree/CommonANNIndexes.cpp index e8b7d85e875..4b360e029e5 100644 --- a/src/Storages/MergeTree/CommonANNIndexes.cpp +++ b/src/Storages/MergeTree/CommonANNIndexes.cpp @@ -598,7 +598,7 @@ float ANNCondition::getFloatOrIntLiteralOrPanic(const RPN::iterator& iter) { return static_cast(iter->int_literal.value()); } - throw Exception("Wrong parsed AST in buildRPN\n", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Wrong parsed AST in buildRPN\n"); } } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 73e7ae54795..efed27bf5d0 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -522,7 +522,7 @@ String DataPartStorageOnDisk::getUniqueId() const { auto disk = volume->getDisk(); if (!disk->supportZeroCopyReplication()) - throw Exception(fmt::format("Disk {} doesn't support zero-copy replication", disk->getName()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Disk {} doesn't support zero-copy replication", disk->getName()); return disk->getUniqueId(fs::path(getRelativePath()) / "checksums.txt"); } @@ -680,14 +680,15 @@ void DataPartStorageOnDisk::rename( { disk.setLastModified(from, Poco::Timestamp::fromEpochTime(time(nullptr))); disk.moveDirectory(from, to); + + /// Only after moveDirectory() since before the directory does not exists. + SyncGuardPtr to_sync_guard; + if (fsync_part_dir) + to_sync_guard = volume->getDisk()->getDirectorySyncGuard(to); }); part_dir = new_part_dir; root_path = new_root_path; - - SyncGuardPtr sync_guard; - if (fsync_part_dir) - sync_guard = volume->getDisk()->getDirectorySyncGuard(getRelativePath()); } void DataPartStorageOnDisk::changeRootPath(const std::string & from_root, const std::string & to_root) @@ -735,10 +736,7 @@ std::unique_ptr DataPartStorageOnDisk::writeFile( size_t buf_size, const WriteSettings & settings) { - if (transaction) - return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings, /* autocommit = */ false); - - return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, WriteMode::Rewrite, settings); + return writeFile(name, buf_size, WriteMode::Rewrite, settings); } std::unique_ptr DataPartStorageOnDisk::writeFile( @@ -749,6 +747,7 @@ std::unique_ptr DataPartStorageOnDisk::writeFile( { if (transaction) return transaction->writeFile(fs::path(root_path) / part_dir / name, buf_size, mode, settings, /* autocommit = */ false); + return volume->getDisk()->writeFile(fs::path(root_path) / part_dir / name, buf_size, mode, settings); } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index 3e82d44d71e..86b8784e2b1 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -106,17 +106,17 @@ public: const String & name, size_t buf_size, const WriteSettings & settings) override; + std::unique_ptr writeFile( + const String & name, + size_t buf_size, + DB::WriteMode mode, + const WriteSettings & settings) override; std::unique_ptr writeTransactionFile(WriteMode mode) const override; void createFile(const String & name) override; void moveFile(const String & from_name, const String & to_name) override; void replaceFile(const String & from_name, const String & to_name) override; - std::unique_ptr writeFile( - const String & name, - size_t buf_size, - DB::WriteMode mode, - const WriteSettings & settings) override; void removeFile(const String & name) override; void removeFileIfExists(const String & name) override; diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index f5c07cd8635..714909199c7 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -310,7 +310,7 @@ MergeTreeData::DataPart::Checksums Service::sendPartFromDisk( copyDataWithThrottler(*file_in, hashing_out, blocker.getCounter(), data.getSendsThrottler()); if (blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Transferring part to replica was cancelled"); if (hashing_out.count() != size) throw Exception( @@ -400,7 +400,7 @@ void Service::sendPartFromDiskRemoteMeta( HashingWriteBuffer hashing_out(out); copyDataWithThrottler(buf, hashing_out, blocker.getCounter(), data.getSendsThrottler()); if (blocker.isCancelled()) - throw Exception("Transferring part to replica was cancelled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Transferring part to replica was cancelled"); if (hashing_out.count() != file_size) throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {}", metadata_file_path); @@ -620,13 +620,14 @@ MergeTreeData::MutableDataPartPtr Fetcher::fetchSelectedPart( if (!remote_fs_metadata.empty()) { if (!try_zero_copy) - throw Exception("Got unexpected 'remote_fs_metadata' cookie", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected 'remote_fs_metadata' cookie"); if (std::find(capability.begin(), capability.end(), remote_fs_metadata) == capability.end()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", remote_fs_metadata, fmt::join(capability, ", ")); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie {}, expect one from {}", + remote_fs_metadata, fmt::join(capability, ", ")); if (server_protocol_version < REPLICATION_PROTOCOL_VERSION_WITH_PARTS_ZERO_COPY) throw Exception(ErrorCodes::LOGICAL_ERROR, "Got 'remote_fs_metadata' cookie with old protocol version {}", server_protocol_version); if (part_type == "InMemory") - throw Exception("Got 'remote_fs_metadata' cookie for in-memory part", ErrorCodes::INCORRECT_PART_TYPE); + throw Exception(ErrorCodes::INCORRECT_PART_TYPE, "Got 'remote_fs_metadata' cookie for in-memory part"); try { @@ -734,7 +735,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToMemory( MergeTreeData::DataPart::Checksums checksums; if (!checksums.read(in)) - throw Exception("Cannot deserialize checksums", ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Cannot deserialize checksums"); NativeReader block_in(in, 0); auto block = block_in.read(); @@ -827,6 +828,8 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( size_t files; readBinary(files, in); + std::vector> written_files; + for (size_t i = 0; i < files; ++i) { String file_name; @@ -844,8 +847,8 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( "This may happen if we are trying to download part from malicious replica or logical error.", absolute_file_path, data_part_storage->getRelativePath()); - auto file_out = data_part_storage->writeFile(file_name, std::min(file_size, DBMS_DEFAULT_BUFFER_SIZE), {}); - HashingWriteBuffer hashing_out(*file_out); + written_files.emplace_back(data_part_storage->writeFile(file_name, std::min(file_size, DBMS_DEFAULT_BUFFER_SIZE), {})); + HashingWriteBuffer hashing_out(*written_files.back()); copyDataWithThrottler(in, hashing_out, file_size, blocker.getCounter(), throttler); if (blocker.isCancelled()) @@ -869,9 +872,14 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( file_name != "columns.txt" && file_name != IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME) checksums.addFile(file_name, file_size, expected_hash); + } + /// Call fsync for all files at once in attempt to decrease the latency + for (auto & file : written_files) + { + file->finalize(); if (sync) - hashing_out.sync(); + file->sync(); } } @@ -895,7 +903,7 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( || part_name.empty() || std::string::npos != tmp_prefix.find_first_of("/.") || std::string::npos != part_name.find_first_of("/.")) - throw Exception("Logical error: tmp_prefix and part_name cannot be empty or contain '.' or '/' characters.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: tmp_prefix and part_name cannot be empty or contain '.' or '/' characters."); String part_dir = tmp_prefix + part_name; String part_relative_path = data.getRelativeDataPath() + String(to_detached ? "detached/" : ""); @@ -989,7 +997,9 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( if (!disk->supportZeroCopyReplication() || !disk->checkUniqueId(part_id)) { - throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Part {} unique id {} doesn't exist on {} (with type {}).", part_name, part_id, disk->getName(), toString(disk->getDataSourceDescription().type)); + throw Exception(ErrorCodes::ZERO_COPY_REPLICATION_ERROR, + "Part {} unique id {} doesn't exist on {} (with type {}).", + part_name, part_id, disk->getName(), toString(disk->getDataSourceDescription().type)); } LOG_DEBUG(log, "Downloading Part {} unique id {} metadata onto disk {}.", diff --git a/src/Storages/MergeTree/DropPartsRanges.cpp b/src/Storages/MergeTree/DropPartsRanges.cpp index bc4f20a3471..19b44c7e173 100644 --- a/src/Storages/MergeTree/DropPartsRanges.cpp +++ b/src/Storages/MergeTree/DropPartsRanges.cpp @@ -44,7 +44,9 @@ void DropPartsRanges::addDropRange(const ReplicatedMergeTreeLogEntryPtr & entry) void DropPartsRanges::removeDropRange(const ReplicatedMergeTreeLogEntryPtr & entry) { if (entry->type != ReplicatedMergeTreeLogEntry::DROP_RANGE) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to remove entry of type {} from drop ranges, expected DROP_RANGE", entry->typeToString()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Trying to remove entry of type {} from drop ranges, expected DROP_RANGE", + entry->typeToString()); auto it = drop_ranges.find(entry->znode_name); assert(it != drop_ranges.end()); diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp index ca81578c5c6..996d2bc46a5 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.cpp @@ -17,7 +17,7 @@ EphemeralLockInZooKeeper::EphemeralLockInZooKeeper(const String & path_prefix_, : zookeeper(zookeeper_), path_prefix(path_prefix_), path(path_), conflict_path(conflict_path_) { if (conflict_path.empty() && path.size() <= path_prefix.size()) - throw Exception("Logical error: name of the main node is shorter than prefix.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the main node is shorter than prefix."); } template @@ -179,8 +179,7 @@ EphemeralLocksInAllPartitions::EphemeralLocksInAllPartitions( size_t prefix_size = block_numbers_path.size() + 1 + partitions[i].size() + 1 + path_prefix.size(); const String & path = dynamic_cast(*lock_responses[i]).path_created; if (path.size() <= prefix_size) - throw Exception("Logical error: name of the sequential node is shorter than prefix.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: name of the sequential node is shorter than prefix."); UInt64 number = parse(path.c_str() + prefix_size, path.size() - prefix_size); locks.push_back(LockInfo{path, partitions[i], number}); diff --git a/src/Storages/MergeTree/EphemeralLockInZooKeeper.h b/src/Storages/MergeTree/EphemeralLockInZooKeeper.h index a8b60c6ef8a..f84f9ebb46c 100644 --- a/src/Storages/MergeTree/EphemeralLockInZooKeeper.h +++ b/src/Storages/MergeTree/EphemeralLockInZooKeeper.h @@ -97,7 +97,7 @@ public: void checkCreated() const { if (!isLocked()) - throw Exception("EphemeralLock is not created", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "EphemeralLock is not created"); } ~EphemeralLockInZooKeeper(); diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp index ffd444b7135..f7dc5f50568 100644 --- a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp +++ b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp @@ -39,9 +39,8 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, Merg const MergeTreeData::DataPartPtr & first_part = parts_.front(); if (part->partition.value != first_part->partition.value) - throw Exception( - "Attempting to merge parts " + first_part->name + " and " + part->name + " that are in different partitions", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempting to merge parts {} and {} that are in different partitions", + first_part->name, part->name); } parts = std::move(parts_); diff --git a/src/Storages/MergeTree/GinIndexStore.cpp b/src/Storages/MergeTree/GinIndexStore.cpp index 995f4f9f88c..0904855755c 100644 --- a/src/Storages/MergeTree/GinIndexStore.cpp +++ b/src/Storages/MergeTree/GinIndexStore.cpp @@ -1,115 +1,84 @@ #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace DB { -using TokenPostingsBuilderPair = std::pair; -using TokenPostingsBuilderPairs = std::vector; - namespace ErrorCodes { extern const int LOGICAL_ERROR; extern const int UNKNOWN_FORMAT_VERSION; }; -GinIndexStore::GinIndexStore(const String & name_, DataPartStoragePtr storage_) - : name(name_) - , storage(storage_) -{ -} -GinIndexStore::GinIndexStore(const String& name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_) - : name(name_) - , storage(storage_) - , data_part_storage_builder(data_part_storage_builder_) - , max_digestion_size(max_digestion_size_) -{ -} - -GinIndexPostingsBuilder::GinIndexPostingsBuilder(UInt64 limit) : rowid_lst{}, size_limit(limit) +GinIndexPostingsBuilder::GinIndexPostingsBuilder(UInt64 limit) + : rowid_lst{} + , size_limit(limit) {} bool GinIndexPostingsBuilder::contains(UInt32 row_id) const { if (useRoaring()) return rowid_bitmap.contains(row_id); - - const auto * const it = std::find(rowid_lst.begin(), rowid_lst.begin()+rowid_lst_length, row_id); - return it != rowid_lst.begin() + rowid_lst_length; + else + { + const auto * const it = std::find(rowid_lst.begin(), rowid_lst.begin()+rowid_lst_length, row_id); + return it != rowid_lst.begin() + rowid_lst_length; + } } void GinIndexPostingsBuilder::add(UInt32 row_id) { if (containsAllRows()) - { return; - } + if (useRoaring()) { if (rowid_bitmap.cardinality() == size_limit) { - //reset the postings list with MATCH ALWAYS; - rowid_lst_length = 1; //makes sure useRoaring() returns false; - rowid_lst[0] = UINT32_MAX; //set CONTAINS ALL flag; + /// reset the postings list with MATCH ALWAYS; + rowid_lst_length = 1; /// makes sure useRoaring() returns false; + rowid_lst[0] = CONTAINS_ALL; /// set CONTAINS_ALL flag; } else - { rowid_bitmap.add(row_id); - } - return; } - assert(rowid_lst_length < MIN_SIZE_FOR_ROARING_ENCODING); - rowid_lst[rowid_lst_length] = row_id; - rowid_lst_length++; - - if (rowid_lst_length == MIN_SIZE_FOR_ROARING_ENCODING) + else { - for (size_t i = 0; i < rowid_lst_length; i++) - rowid_bitmap.add(rowid_lst[i]); + assert(rowid_lst_length < MIN_SIZE_FOR_ROARING_ENCODING); + rowid_lst[rowid_lst_length] = row_id; + rowid_lst_length++; - rowid_lst_length = UsesBitMap; + if (rowid_lst_length == MIN_SIZE_FOR_ROARING_ENCODING) + { + for (size_t i = 0; i < rowid_lst_length; i++) + rowid_bitmap.add(rowid_lst[i]); + + rowid_lst_length = USES_BIT_MAP; + } } } -bool GinIndexPostingsBuilder::useRoaring() const -{ - return rowid_lst_length == UsesBitMap; -} - -bool GinIndexPostingsBuilder::containsAllRows() const -{ - return rowid_lst[0] == UINT32_MAX; -} - -UInt64 GinIndexPostingsBuilder::serialize(WriteBuffer &buffer) const +UInt64 GinIndexPostingsBuilder::serialize(WriteBuffer & buffer) const { UInt64 written_bytes = 0; buffer.write(rowid_lst_length); written_bytes += 1; - if (!useRoaring()) - { - for (size_t i = 0; i < rowid_lst_length; ++i) - { - writeVarUInt(rowid_lst[i], buffer); - written_bytes += getLengthOfVarUInt(rowid_lst[i]); - } - } - else + if (useRoaring()) { auto size = rowid_bitmap.getSizeInBytes(); @@ -121,65 +90,85 @@ UInt64 GinIndexPostingsBuilder::serialize(WriteBuffer &buffer) const buffer.write(buf.get(), size); written_bytes += size; } + else + { + for (size_t i = 0; i < rowid_lst_length; ++i) + { + writeVarUInt(rowid_lst[i], buffer); + written_bytes += getLengthOfVarUInt(rowid_lst[i]); + } + } + return written_bytes; } -GinIndexPostingsListPtr GinIndexPostingsBuilder::deserialize(ReadBuffer &buffer) +GinIndexPostingsListPtr GinIndexPostingsBuilder::deserialize(ReadBuffer & buffer) { UInt8 postings_list_size = 0; - buffer.readStrict(reinterpret_cast(postings_list_size)); + buffer.readStrict(reinterpret_cast(postings_list_size)); - if (postings_list_size != UsesBitMap) + if (postings_list_size == USES_BIT_MAP) + { + size_t size = 0; + readVarUInt(size, buffer); + auto buf = std::make_unique(size); + buffer.readStrict(reinterpret_cast(buf.get()), size); + + GinIndexPostingsListPtr postings_list = std::make_shared(GinIndexPostingsList::read(buf.get())); + + return postings_list; + } + else { assert(postings_list_size < MIN_SIZE_FOR_ROARING_ENCODING); GinIndexPostingsListPtr postings_list = std::make_shared(); UInt32 row_ids[MIN_SIZE_FOR_ROARING_ENCODING]; for (auto i = 0; i < postings_list_size; ++i) - { readVarUInt(row_ids[i], buffer); - } postings_list->addMany(postings_list_size, row_ids); return postings_list; } - else - { - size_t size{0}; - readVarUInt(size, buffer); - auto buf = std::make_unique(size); - buffer.readStrict(reinterpret_cast(buf.get()), size); +} - GinIndexPostingsListPtr postings_list = std::make_shared(GinIndexPostingsList::read(buf.get())); - - return postings_list; - } +GinIndexStore::GinIndexStore(const String & name_, DataPartStoragePtr storage_) + : name(name_) + , storage(storage_) +{ +} +GinIndexStore::GinIndexStore(const String & name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_) + : name(name_) + , storage(storage_) + , data_part_storage_builder(data_part_storage_builder_) + , max_digestion_size(max_digestion_size_) +{ } bool GinIndexStore::exists() const { - String id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; - return storage->exists(id_file_name); + String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; + return storage->exists(segment_id_file_name); } -UInt32 GinIndexStore::getNextSegmentIDRange(const String& file_name, size_t n) +UInt32 GinIndexStore::getNextSegmentIDRange(const String & file_name, size_t n) { - std::lock_guard guard(gin_index_store_mutex); + std::lock_guard guard(mutex); - /// When the method is called for the first time, the file doesn't exist yet, need to create it - /// and write segment ID 1. + /// When the method is called for the first time, the file doesn't exist yet, need to create it and write segment ID 1. if (!storage->exists(file_name)) { - /// Create file and write initial segment id = 1 + /// Create file std::unique_ptr ostr = this->data_part_storage_builder->writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {}); /// Write version writeChar(static_cast(CURRENT_GIN_FILE_FORMAT_VERSION), *ostr); + /// Write segment ID 1 writeVarUInt(1, *ostr); ostr->sync(); } - /// read id in file + /// Read id in file UInt32 result = 0; { std::unique_ptr istr = this->storage->readFile(file_name, {}, std::nullopt, std::nullopt); @@ -189,7 +178,8 @@ UInt32 GinIndexStore::getNextSegmentIDRange(const String& file_name, size_t n) readVarUInt(result, *istr); } - //save result+n + + /// Save result + n { std::unique_ptr ostr = this->data_part_storage_builder->writeFile(file_name, DBMS_DEFAULT_BUFFER_SIZE, {}); @@ -204,15 +194,15 @@ UInt32 GinIndexStore::getNextSegmentIDRange(const String& file_name, size_t n) UInt32 GinIndexStore::getNextRowIDRange(size_t numIDs) { - UInt32 result =current_segment.next_row_id; + UInt32 result = current_segment.next_row_id; current_segment.next_row_id += numIDs; return result; } UInt32 GinIndexStore::getNextSegmentID() { - String sid_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; - return getNextSegmentIDRange(sid_file_name, 1); + String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; + return getNextSegmentIDRange(segment_id_file_name, 1); } UInt32 GinIndexStore::getNumOfSegments() @@ -220,18 +210,18 @@ UInt32 GinIndexStore::getNumOfSegments() if (cached_segment_num) return cached_segment_num; - String sid_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; - if (!storage->exists(sid_file_name)) + String segment_id_file_name = getName() + GIN_SEGMENT_ID_FILE_TYPE; + if (!storage->exists(segment_id_file_name)) return 0; UInt32 result = 0; { - std::unique_ptr istr = this->storage->readFile(sid_file_name, {}, std::nullopt, std::nullopt); + std::unique_ptr istr = this->storage->readFile(segment_id_file_name, {}, std::nullopt, std::nullopt); uint8_t version = 0; readBinary(version, *istr); - if (version > CURRENT_GIN_FILE_FORMAT_VERSION) + if (version > static_cast>(CURRENT_GIN_FILE_FORMAT_VERSION)) throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, "Unsupported inverted index version {}", version); readVarUInt(result, *istr); @@ -250,88 +240,84 @@ bool GinIndexStore::needToWrite() const void GinIndexStore::finalize() { if (!current_postings.empty()) - { writeSegment(); - } } void GinIndexStore::initFileStreams() { - String segment_file_name = getName() + GIN_SEGMENT_FILE_TYPE; - String term_dict_file_name = getName() + GIN_DICTIONARY_FILE_TYPE; + String metadata_file_name = getName() + GIN_SEGMENT_METADATA_FILE_TYPE; + String dict_file_name = getName() + GIN_DICTIONARY_FILE_TYPE; String postings_file_name = getName() + GIN_POSTINGS_FILE_TYPE; - segment_file_stream = data_part_storage_builder->writeFile(segment_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); - term_dict_file_stream = data_part_storage_builder->writeFile(term_dict_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); + metadata_file_stream = data_part_storage_builder->writeFile(metadata_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); + dict_file_stream = data_part_storage_builder->writeFile(dict_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); postings_file_stream = data_part_storage_builder->writeFile(postings_file_name, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append, {}); } void GinIndexStore::writeSegment() { - if (segment_file_stream == nullptr) - { + if (metadata_file_stream == nullptr) initFileStreams(); - } + + using TokenPostingsBuilderPair = std::pair; + using TokenPostingsBuilderPairs = std::vector; /// Write segment - segment_file_stream->write(reinterpret_cast(¤t_segment), sizeof(GinIndexSegment)); + metadata_file_stream->write(reinterpret_cast(¤t_segment), sizeof(GinIndexSegment)); TokenPostingsBuilderPairs token_postings_list_pairs; token_postings_list_pairs.reserve(current_postings.size()); - for (const auto& [token, postings_list] : current_postings) - { + for (const auto & [token, postings_list] : current_postings) token_postings_list_pairs.push_back({token, postings_list}); - } /// Sort token-postings list pairs since all tokens have to be added in FST in sorted order std::sort(token_postings_list_pairs.begin(), token_postings_list_pairs.end(), - [](const TokenPostingsBuilderPair& a, const TokenPostingsBuilderPair & b) + [](const TokenPostingsBuilderPair & x, const TokenPostingsBuilderPair & y) { - return a.first < b.first; + return x.first < y.first; }); - ///write postings + /// Write postings std::vector posting_list_byte_sizes(current_postings.size(), 0); - for (size_t current_index = 0; const auto& [token, postings_list] : token_postings_list_pairs) + for (size_t i = 0; const auto & [token, postings_list] : token_postings_list_pairs) { auto posting_list_byte_size = postings_list->serialize(*postings_file_stream); - posting_list_byte_sizes[current_index] = posting_list_byte_size; - current_index++; + posting_list_byte_sizes[i] = posting_list_byte_size; + i++; current_segment.postings_start_offset += posting_list_byte_size; } ///write item dictionary std::vector buffer; WriteBufferFromVector> write_buf(buffer); - FST::FSTBuilder builder(write_buf); + FST::FstBuilder fst_builder(write_buf); UInt64 offset = 0; - for (size_t current_index = 0; const auto& [token, postings_list] : token_postings_list_pairs) + for (size_t i = 0; const auto & [token, postings_list] : token_postings_list_pairs) { - String str_token{token}; - builder.add(str_token, offset); - offset += posting_list_byte_sizes[current_index]; - current_index++; + fst_builder.add(token, offset); + offset += posting_list_byte_sizes[i]; + i++; } - builder.build(); + fst_builder.build(); write_buf.finalize(); /// Write FST size - writeVarUInt(buffer.size(), *term_dict_file_stream); - current_segment.term_dict_start_offset += getLengthOfVarUInt(buffer.size()); + writeVarUInt(buffer.size(), *dict_file_stream); + current_segment.dict_start_offset += getLengthOfVarUInt(buffer.size()); - /// Write FST content - term_dict_file_stream->write(reinterpret_cast(buffer.data()), buffer.size()); - current_segment.term_dict_start_offset += buffer.size(); + /// Write FST blob + dict_file_stream->write(reinterpret_cast(buffer.data()), buffer.size()); + current_segment.dict_start_offset += buffer.size(); current_size = 0; current_postings.clear(); current_segment.segment_id = getNextSegmentID(); - segment_file_stream->sync(); - term_dict_file_stream->sync(); + metadata_file_stream->sync(); + dict_file_stream->sync(); postings_file_stream->sync(); } @@ -343,82 +329,79 @@ GinIndexStoreDeserializer::GinIndexStoreDeserializer(const GinIndexStorePtr & st void GinIndexStoreDeserializer::initFileStreams() { - String segment_file_name = store->getName() + GinIndexStore::GIN_SEGMENT_FILE_TYPE; - String term_dict_file_name = store->getName() + GinIndexStore::GIN_DICTIONARY_FILE_TYPE; + String metadata_file_name = store->getName() + GinIndexStore::GIN_SEGMENT_METADATA_FILE_TYPE; + String dict_file_name = store->getName() + GinIndexStore::GIN_DICTIONARY_FILE_TYPE; String postings_file_name = store->getName() + GinIndexStore::GIN_POSTINGS_FILE_TYPE; - segment_file_stream = store->storage->readFile(segment_file_name, {}, std::nullopt, std::nullopt); - term_dict_file_stream = store->storage->readFile(term_dict_file_name, {}, std::nullopt, std::nullopt); + metadata_file_stream = store->storage->readFile(metadata_file_name, {}, std::nullopt, std::nullopt); + dict_file_stream = store->storage->readFile(dict_file_name, {}, std::nullopt, std::nullopt); postings_file_stream = store->storage->readFile(postings_file_name, {}, std::nullopt, std::nullopt); } void GinIndexStoreDeserializer::readSegments() { - auto num_segments = store->getNumOfSegments(); + UInt32 num_segments = store->getNumOfSegments(); if (num_segments == 0) return; + using GinIndexSegments = std::vector; GinIndexSegments segments (num_segments); - assert(segment_file_stream != nullptr); + assert(metadata_file_stream != nullptr); - segment_file_stream->readStrict(reinterpret_cast(segments.data()), num_segments * sizeof(GinIndexSegment)); - for (size_t i = 0; i < num_segments; ++i) + metadata_file_stream->readStrict(reinterpret_cast(segments.data()), num_segments * sizeof(GinIndexSegment)); + for (UInt32 i = 0; i < num_segments; ++i) { auto seg_id = segments[i].segment_id; - auto term_dict = std::make_shared(); - term_dict->postings_start_offset = segments[i].postings_start_offset; - term_dict->term_dict_start_offset = segments[i].term_dict_start_offset; - store->term_dicts[seg_id] = term_dict; + auto seg_dict = std::make_shared(); + seg_dict->postings_start_offset = segments[i].postings_start_offset; + seg_dict->dict_start_offset = segments[i].dict_start_offset; + store->segment_dictionaries[seg_id] = seg_dict; } } -void GinIndexStoreDeserializer::readSegmentTermDictionaries() +void GinIndexStoreDeserializer::readSegmentDictionaries() { for (UInt32 seg_index = 0; seg_index < store->getNumOfSegments(); ++seg_index) - { - readSegmentTermDictionary(seg_index); - } + readSegmentDictionary(seg_index); } -void GinIndexStoreDeserializer::readSegmentTermDictionary(UInt32 segment_id) +void GinIndexStoreDeserializer::readSegmentDictionary(UInt32 segment_id) { /// Check validity of segment_id - auto it = store->term_dicts.find(segment_id); - if (it == store->term_dicts.end()) - { + auto it = store->segment_dictionaries.find(segment_id); + if (it == store->segment_dictionaries.end()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid segment id {}", segment_id); - } - assert(term_dict_file_stream != nullptr); + assert(dict_file_stream != nullptr); - /// Set file pointer of term dictionary file - term_dict_file_stream->seek(it->second->term_dict_start_offset, SEEK_SET); + /// Set file pointer of dictionary file + dict_file_stream->seek(it->second->dict_start_offset, SEEK_SET); it->second->offsets.getData().clear(); /// Read FST size - size_t fst_size{0}; - readVarUInt(fst_size, *term_dict_file_stream); + size_t fst_size = 0; + readVarUInt(fst_size, *dict_file_stream); - /// Read FST content + /// Read FST blob it->second->offsets.getData().resize(fst_size); - term_dict_file_stream->readStrict(reinterpret_cast(it->second->offsets.getData().data()), fst_size); + dict_file_stream->readStrict(reinterpret_cast(it->second->offsets.getData().data()), fst_size); } -SegmentedPostingsListContainer GinIndexStoreDeserializer::readSegmentedPostingsLists(const String& term) +GinSegmentedPostingsListContainer GinIndexStoreDeserializer::readSegmentedPostingsLists(const String & term) { assert(postings_file_stream != nullptr); - SegmentedPostingsListContainer container; - for (auto const& seg_term_dict : store->term_dicts) + GinSegmentedPostingsListContainer container; + for (auto const & seg_dict : store->segment_dictionaries) { - auto segment_id = seg_term_dict.first; + auto segment_id = seg_dict.first; - auto [offset, found] = seg_term_dict.second->offsets.getOutput(term); + auto [offset, found] = seg_dict.second->offsets.getOutput(term); if (!found) continue; // Set postings file pointer for reading postings list - postings_file_stream->seek(seg_term_dict.second->postings_start_offset + offset, SEEK_SET); + postings_file_stream->seek(seg_dict.second->postings_start_offset + offset, SEEK_SET); // Read posting list auto postings_list = GinIndexPostingsBuilder::deserialize(*postings_file_stream); @@ -427,10 +410,10 @@ SegmentedPostingsListContainer GinIndexStoreDeserializer::readSegmentedPostingsL return container; } -PostingsCachePtr GinIndexStoreDeserializer::createPostingsCacheFromTerms(const std::vector& terms) +GinPostingsCachePtr GinIndexStoreDeserializer::createPostingsCacheFromTerms(const std::vector & terms) { - auto postings_cache = std::make_shared(); - for (const auto& term : terms) + auto postings_cache = std::make_shared(); + for (const auto & term : terms) { // Make sure don't read for duplicated terms if (postings_cache->find(term) != postings_cache->end()) @@ -442,18 +425,26 @@ PostingsCachePtr GinIndexStoreDeserializer::createPostingsCacheFromTerms(const s return postings_cache; } -GinIndexStoreFactory& GinIndexStoreFactory::instance() +GinPostingsCachePtr PostingsCacheForStore::getPostings(const String & query_string) const +{ + auto it = cache.find(query_string); + if (it == cache.end()) + return nullptr; + return it->second; +} + +GinIndexStoreFactory & GinIndexStoreFactory::instance() { static GinIndexStoreFactory instance; return instance; } -GinIndexStorePtr GinIndexStoreFactory::get(const String& name, DataPartStoragePtr storage) +GinIndexStorePtr GinIndexStoreFactory::get(const String & name, DataPartStoragePtr storage) { - const String& part_path = storage->getRelativePath(); + const String & part_path = storage->getRelativePath(); String key = name + ":" + part_path; - std::lock_guard lock(stores_mutex); + std::lock_guard lock(mutex); GinIndexStores::const_iterator it = stores.find(key); if (it == stores.end()) @@ -464,7 +455,7 @@ GinIndexStorePtr GinIndexStoreFactory::get(const String& name, DataPartStoragePt GinIndexStoreDeserializer deserializer(store); deserializer.readSegments(); - deserializer.readSegmentTermDictionaries(); + deserializer.readSegmentDictionaries(); stores[key] = store; @@ -473,9 +464,9 @@ GinIndexStorePtr GinIndexStoreFactory::get(const String& name, DataPartStoragePt return it->second; } -void GinIndexStoreFactory::remove(const String& part_path) +void GinIndexStoreFactory::remove(const String & part_path) { - std::lock_guard lock(stores_mutex); + std::lock_guard lock(mutex); for (auto it = stores.begin(); it != stores.end();) { if (it->first.find(part_path) != String::npos) @@ -484,4 +475,5 @@ void GinIndexStoreFactory::remove(const String& part_path) ++it; } } + } diff --git a/src/Storages/MergeTree/GinIndexStore.h b/src/Storages/MergeTree/GinIndexStore.h index c326322191f..e075a7e73bf 100644 --- a/src/Storages/MergeTree/GinIndexStore.h +++ b/src/Storages/MergeTree/GinIndexStore.h @@ -1,18 +1,18 @@ #pragma once -#include -#include -#include -#include +#include #include #include #include #include -#include -#include #include +#include +#include +#include +#include +#include -/// GinIndexStore manages the inverted index for a data part, and it is made up of one or more immutable +/// GinIndexStore manages the generalized inverted index ("gin") for a data part, and it is made up of one or more immutable /// index segments. /// /// There are 4 types of index files in a store: @@ -20,40 +20,30 @@ /// 2. Segment Metadata file(.gin_seg): it contains index segment metadata. /// - Its file format is an array of GinIndexSegment as defined in this file. /// - postings_start_offset points to the file(.gin_post) starting position for the segment's postings list. -/// - term_dict_start_offset points to the file(.gin_dict) starting position for the segment's term dictionaries. -/// 3. Term Dictionary file(.gin_dict): it contains term dictionaries. +/// - dict_start_offset points to the file(.gin_dict) starting position for the segment's dictionaries. +/// 3. Dictionary file(.gin_dict): it contains dictionaries. /// - It contains an array of (FST_size, FST_blob) which has size and actual data of FST. /// 4. Postings Lists(.gin_post): it contains postings lists data. /// - It contains an array of serialized postings lists. /// /// During the searching in the segment, the segment's meta data can be found in .gin_seg file. From the meta data, -/// the starting position of its term dictionary is used to locate its FST. Then FST is read into memory. +/// the starting position of its dictionary is used to locate its FST. Then FST is read into memory. /// By using the term and FST, the offset("output" in FST) of the postings list for the term /// in FST is found. The offset plus the postings_start_offset is the file location in .gin_post file /// for its postings list. namespace DB { -enum : uint8_t -{ - GIN_VERSION_0 = 0, - GIN_VERSION_1 = 1, /// Initial version -}; - -static constexpr auto CURRENT_GIN_FILE_FORMAT_VERSION = GIN_VERSION_1; /// GinIndexPostingsList which uses 32-bit Roaring using GinIndexPostingsList = roaring::Roaring; - using GinIndexPostingsListPtr = std::shared_ptr; -/// Gin Index Postings List Builder. +/// Build a postings list for a term class GinIndexPostingsBuilder { public: - constexpr static int MIN_SIZE_FOR_ROARING_ENCODING = 16; - - GinIndexPostingsBuilder(UInt64 limit); + explicit GinIndexPostingsBuilder(UInt64 limit); /// Check whether a row_id is already added bool contains(UInt32 row_id) const; @@ -61,91 +51,88 @@ public: /// Add a row_id into the builder void add(UInt32 row_id); - /// Check whether the builder is using roaring bitmap - bool useRoaring() const; - - /// Check whether the postings list has been flagged to contain all row ids - bool containsAllRows() const; - /// Serialize the content of builder to given WriteBuffer, returns the bytes of serialized data - UInt64 serialize(WriteBuffer &buffer) const; + UInt64 serialize(WriteBuffer & buffer) const; /// Deserialize the postings list data from given ReadBuffer, return a pointer to the GinIndexPostingsList created by deserialization - static GinIndexPostingsListPtr deserialize(ReadBuffer &buffer); + static GinIndexPostingsListPtr deserialize(ReadBuffer & buffer); + private: + constexpr static int MIN_SIZE_FOR_ROARING_ENCODING = 16; + /// When the list length is no greater than MIN_SIZE_FOR_ROARING_ENCODING, array 'rowid_lst' is used + /// As a special case, rowid_lst[0] == CONTAINS_ALL encodes that all rowids are set. std::array rowid_lst; - /// When the list length is greater than MIN_SIZE_FOR_ROARING_ENCODING, Roaring bitmap 'rowid_bitmap' is used + /// When the list length is greater than MIN_SIZE_FOR_ROARING_ENCODING, roaring bitmap 'rowid_bitmap' is used roaring::Roaring rowid_bitmap; /// rowid_lst_length stores the number of row IDs in 'rowid_lst' array, can also be a flag(0xFF) indicating that roaring bitmap is used - UInt8 rowid_lst_length{0}; + UInt8 rowid_lst_length = 0; + + /// Indicates that all rowids are contained, see 'rowid_lst' + static constexpr UInt32 CONTAINS_ALL = std::numeric_limits::max(); + + /// Indicates that roaring bitmap is used, see 'rowid_lst_length'. + static constexpr UInt8 USES_BIT_MAP = 0xFF; - static constexpr UInt8 UsesBitMap = 0xFF; /// Clear the postings list and reset it with MATCHALL flags when the size of the postings list is beyond the limit UInt64 size_limit; + + /// Check whether the builder is using roaring bitmap + bool useRoaring() const { return rowid_lst_length == USES_BIT_MAP; } + + /// Check whether the postings list has been flagged to contain all row ids + bool containsAllRows() const { return rowid_lst[0] == CONTAINS_ALL; } }; -/// Container for postings lists for each segment -using SegmentedPostingsListContainer = std::unordered_map; +using GinIndexPostingsBuilderPtr = std::shared_ptr; -/// Postings lists and terms built from query string -using PostingsCache = std::unordered_map; -using PostingsCachePtr = std::shared_ptr; - -/// Gin Index Segment information, which contains: +/// Gin index segment descriptor, which contains: struct GinIndexSegment { /// Segment ID retrieved from next available ID from file .gin_sid UInt32 segment_id = 0; - /// Next row ID for this segment + /// Start row ID for this segment UInt32 next_row_id = 1; /// .gin_post file offset of this segment's postings lists UInt64 postings_start_offset = 0; - /// .term_dict file offset of this segment's term dictionaries - UInt64 term_dict_start_offset = 0; + /// .gin_dict file offset of this segment's dictionaries + UInt64 dict_start_offset = 0; }; -using GinIndexSegments = std::vector; - -struct SegmentTermDictionary +struct GinSegmentDictionary { /// .gin_post file offset of this segment's postings lists UInt64 postings_start_offset; - /// .gin_dict file offset of this segment's term dictionaries - UInt64 term_dict_start_offset; + /// .gin_dict file offset of this segment's dictionaries + UInt64 dict_start_offset; - /// Finite State Transducer, which can be viewed as a map of , where offset is the + /// (Minimized) Finite State Transducer, which can be viewed as a map of , where offset is the /// offset to the term's posting list in postings list file FST::FiniteStateTransducer offsets; }; -using SegmentTermDictionaryPtr = std::shared_ptr; +using GinSegmentDictionaryPtr = std::shared_ptr; -/// Term dictionaries indexed by segment ID -using SegmentTermDictionaries = std::unordered_map; - -/// Gin Index Store which has Gin Index meta data for the corresponding Data Part +/// Gin index store which has gin index meta data for the corresponding column data part class GinIndexStore { public: - using GinIndexPostingsBuilderPtr = std::shared_ptr; /// Container for all term's Gin Index Postings List Builder using GinIndexPostingsBuilderContainer = std::unordered_map; - explicit GinIndexStore(const String & name_, DataPartStoragePtr storage_); - - GinIndexStore(const String& name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_); + GinIndexStore(const String & name_, DataPartStoragePtr storage_); + GinIndexStore(const String & name_, DataPartStoragePtr storage_, MutableDataPartStoragePtr data_part_storage_builder_, UInt64 max_digestion_size_); /// Check existence by checking the existence of file .gin_sid bool exists() const; - /// Get a range of next 'numIDs' available row IDs + /// Get a range of next 'numIDs'-many available row IDs UInt32 getNextRowIDRange(size_t numIDs); /// Get next available segment ID by updating file .gin_sid @@ -155,25 +142,26 @@ public: UInt32 getNumOfSegments(); /// Get current postings list builder - const GinIndexPostingsBuilderContainer& getPostings() const { return current_postings; } + const GinIndexPostingsBuilderContainer & getPostingsListBuilder() const { return current_postings; } /// Set postings list builder for given term void setPostingsBuilder(const String & term, GinIndexPostingsBuilderPtr builder) { current_postings[term] = builder; } + /// Check if we need to write segment to Gin index files bool needToWrite() const; /// Accumulate the size of text data which has been digested void incrementCurrentSizeBy(UInt64 sz) { current_size += sz; } - UInt32 getCurrentSegmentID() const { return current_segment.segment_id;} + UInt32 getCurrentSegmentID() const { return current_segment.segment_id; } /// Do last segment writing void finalize(); - /// method for writing segment data to Gin index files + /// Method for writing segment data to Gin index files void writeSegment(); - const String & getName() const {return name;} + const String & getName() const { return name; } private: friend class GinIndexStoreDeserializer; @@ -182,7 +170,7 @@ private: void initFileStreams(); /// Get a range of next available segment IDs by updating file .gin_sid - UInt32 getNextSegmentIDRange(const String &file_name, size_t n); + UInt32 getNextSegmentIDRange(const String & file_name, size_t n); String name; DataPartStoragePtr storage; @@ -190,37 +178,89 @@ private: UInt32 cached_segment_num = 0; - std::mutex gin_index_store_mutex; + std::mutex mutex; - /// Terms dictionaries which are loaded from .gin_dict files - SegmentTermDictionaries term_dicts; + /// Dictionaries indexed by segment ID + using GinSegmentDictionaries = std::unordered_map; - /// container for building postings lists during index construction + /// Term's dictionaries which are loaded from .gin_dict files + GinSegmentDictionaries segment_dictionaries; + + /// Container for building postings lists during index construction GinIndexPostingsBuilderContainer current_postings; - /// The following is for segmentation of Gin index - GinIndexSegment current_segment{}; + /// For the segmentation of Gin indexes + GinIndexSegment current_segment; UInt64 current_size = 0; const UInt64 max_digestion_size = 0; - /// File streams for segment, term dictionaries and postings lists - std::unique_ptr segment_file_stream; - std::unique_ptr term_dict_file_stream; + /// File streams for segment, dictionaries and postings lists + std::unique_ptr metadata_file_stream; + std::unique_ptr dict_file_stream; std::unique_ptr postings_file_stream; static constexpr auto GIN_SEGMENT_ID_FILE_TYPE = ".gin_sid"; - static constexpr auto GIN_SEGMENT_FILE_TYPE = ".gin_seg"; + static constexpr auto GIN_SEGMENT_METADATA_FILE_TYPE = ".gin_seg"; static constexpr auto GIN_DICTIONARY_FILE_TYPE = ".gin_dict"; static constexpr auto GIN_POSTINGS_FILE_TYPE = ".gin_post"; + + enum class Format : uint8_t + { + v0 = 0, + v1 = 1, /// Initial version + }; + + static constexpr auto CURRENT_GIN_FILE_FORMAT_VERSION = Format::v0; }; using GinIndexStorePtr = std::shared_ptr; -/// GinIndexStores indexed by part file path -using GinIndexStores = std::unordered_map; +/// Container for postings lists for each segment +using GinSegmentedPostingsListContainer = std::unordered_map; + +/// Postings lists and terms built from query string +using GinPostingsCache = std::unordered_map; +using GinPostingsCachePtr = std::shared_ptr; + +/// Gin index store reader which helps to read segments, dictionaries and postings list +class GinIndexStoreDeserializer : private boost::noncopyable +{ +public: + explicit GinIndexStoreDeserializer(const GinIndexStorePtr & store_); + + /// Read segment information from .gin_seg files + void readSegments(); + + /// Read all dictionaries from .gin_dict files + void readSegmentDictionaries(); + + /// Read dictionary for given segment id + void readSegmentDictionary(UInt32 segment_id); + + /// Read postings lists for the term + GinSegmentedPostingsListContainer readSegmentedPostingsLists(const String & term); + + /// Read postings lists for terms (which are created by tokenzing query string) + GinPostingsCachePtr createPostingsCacheFromTerms(const std::vector & terms); + +private: + /// Initialize gin index files + void initFileStreams(); + + /// The store for the reader + GinIndexStorePtr store; + + /// File streams for reading Gin Index + std::unique_ptr metadata_file_stream; + std::unique_ptr dict_file_stream; + std::unique_ptr postings_file_stream; + + /// Current segment, used in building index + GinIndexSegment current_segment; +}; /// PostingsCacheForStore contains postings lists from 'store' which are retrieved from Gin index files for the terms in query strings -/// PostingsCache is per query string(one query can have multiple query strings): when skipping index(row ID ranges) is used for the part during the +/// GinPostingsCache is per query string (one query can have multiple query strings): when skipping index (row ID ranges) is used for the part during the /// query, the postings cache is created and associated with the store where postings lists are read /// for the tokenized query string. The postings caches are released automatically when the query is done. struct PostingsCacheForStore @@ -229,76 +269,31 @@ struct PostingsCacheForStore GinIndexStorePtr store; /// map of - std::unordered_map cache; + std::unordered_map cache; /// Get postings lists for query string, return nullptr if not found - PostingsCachePtr getPostings(const String &query_string) const - { - auto it {cache.find(query_string)}; - - if (it == cache.cend()) - { - return nullptr; - } - return it->second; - } + GinPostingsCachePtr getPostings(const String & query_string) const; }; -/// GinIndexStore Factory, which is a singleton for storing GinIndexStores +/// A singleton for storing GinIndexStores class GinIndexStoreFactory : private boost::noncopyable { public: /// Get singleton of GinIndexStoreFactory - static GinIndexStoreFactory& instance(); + static GinIndexStoreFactory & instance(); /// Get GinIndexStore by using index name, disk and part_path (which are combined to create key in stores) - GinIndexStorePtr get(const String& name, DataPartStoragePtr storage); + GinIndexStorePtr get(const String & name, DataPartStoragePtr storage); /// Remove all Gin index files which are under the same part_path - void remove(const String& part_path); + void remove(const String & part_path); + + /// GinIndexStores indexed by part file path + using GinIndexStores = std::unordered_map; private: GinIndexStores stores; - std::mutex stores_mutex; -}; - -/// Term dictionary information, which contains: - -/// Gin Index Store Reader which helps to read segments, term dictionaries and postings list -class GinIndexStoreDeserializer : private boost::noncopyable -{ -public: - explicit GinIndexStoreDeserializer(const GinIndexStorePtr & store_); - - /// Read all segment information from .gin_seg files - void readSegments(); - - /// Read all term dictionaries from .gin_dict files - void readSegmentTermDictionaries(); - - /// Read term dictionary for given segment id - void readSegmentTermDictionary(UInt32 segment_id); - - /// Read postings lists for the term - SegmentedPostingsListContainer readSegmentedPostingsLists(const String& term); - - /// Read postings lists for terms(which are created by tokenzing query string) - PostingsCachePtr createPostingsCacheFromTerms(const std::vector& terms); - -private: - /// Initialize Gin index files - void initFileStreams(); - - /// The store for the reader - GinIndexStorePtr store; - - /// File streams for reading Gin Index - std::unique_ptr segment_file_stream; - std::unique_ptr term_dict_file_stream; - std::unique_ptr postings_file_stream; - - /// Current segment, used in building index - GinIndexSegment current_segment; + std::mutex mutex; }; } diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 70cc4d3fe70..84ca3e733c6 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -216,7 +216,11 @@ public: const String & name, size_t buf_size, const WriteSettings & settings) = 0; - virtual std::unique_ptr writeFile(const String & name, size_t buf_size, WriteMode mode, const WriteSettings & settings) = 0; + virtual std::unique_ptr writeFile( + const String & name, + size_t buf_size, + WriteMode mode, + const WriteSettings & settings) = 0; /// A special const method to write transaction file. /// It's const, because file with transaction metadata diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index ec2ea448290..e1427413f62 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -518,7 +518,8 @@ void IMergeTreeDataPart::removeIfNeeded() String file_name = fileName(getDataPartStorage().getPartDirectory()); if (file_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", getDataPartStorage().getPartDirectory(), name); + throw Exception(ErrorCodes::LOGICAL_ERROR, "relative_path {} of part {} is invalid or not set", + getDataPartStorage().getPartDirectory(), name); if (!startsWith(file_name, "tmp") && !endsWith(file_name, ".tmp_proj")) { @@ -579,15 +580,15 @@ void IMergeTreeDataPart::assertState(const std::initializer_listprotect(); if (loaded_index[i]->size() != marks_count) - throw Exception("Cannot read all data from index file " + index_path - + "(expected size: " + toString(marks_count) + ", read: " + toString(loaded_index[i]->size()) + ")", - ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read all data from index file {}(expected size: " + "{}, read: {})", index_path, marks_count, loaded_index[i]->size()); } if (!index_file->eof()) - throw Exception("Index file " + index_path + " is unexpectedly long", ErrorCodes::EXPECTED_END_OF_FILE); + throw Exception(ErrorCodes::EXPECTED_END_OF_FILE, "Index file {} is unexpectedly long", index_path); index.assign(std::make_move_iterator(loaded_index.begin()), std::make_move_iterator(loaded_index.end())); } @@ -1055,10 +1057,9 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex() auto metadata_snapshot = storage.getInMemoryMetadataPtr(); String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block); if (calculated_partition_id != info.partition_id) - throw Exception( - "While loading part " + getDataPartStorage().getFullPath() + ": calculated partition ID: " + calculated_partition_id - + " differs from partition ID in part name: " + info.partition_id, - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "While loading part {}: " + "calculated partition ID: {} differs from partition ID in part name: {}", + getDataPartStorage().getFullPath(), calculated_partition_id, info.partition_id); } void IMergeTreeDataPart::appendFilesOfPartitionAndMinMaxIndex(Strings & files) const @@ -1125,7 +1126,7 @@ void IMergeTreeDataPart::loadRowsCount() { bool exists = metadata_manager->exists("count.txt"); if (!exists) - throw Exception("No count.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No count.txt in part {}", name); read_rows_count(); @@ -1147,9 +1148,10 @@ void IMergeTreeDataPart::loadRowsCount() if (rows_in_column != rows_count) { throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Column {} has rows count {} according to size in memory " - "and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count); + ErrorCodes::LOGICAL_ERROR, + "Column {} has rows count {} according to size in memory " + "and size of single value, but data part {} has {} rows", + backQuote(column.name), rows_in_column, name, rows_count); } size_t last_possibly_incomplete_mark_rows = index_granularity.getLastNonFinalMarkRows(); @@ -1159,20 +1161,25 @@ void IMergeTreeDataPart::loadRowsCount() if (rows_in_column < index_granularity_without_last_mark) { throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Column {} has rows count {} according to size in memory " - "and size of single value, but index granularity in part {} without last mark has {} rows, which is more than in column", - backQuote(column.name), rows_in_column, name, index_granularity.getTotalRows()); + ErrorCodes::LOGICAL_ERROR, + "Column {} has rows count {} according to size in memory " + "and size of single value, " + "but index granularity in part {} without last mark has {} rows, which " + "is more than in column", + backQuote(column.name), rows_in_column, name, index_granularity.getTotalRows()); } /// In last mark we actually written less or equal rows than stored in last mark of index granularity if (rows_in_column - index_granularity_without_last_mark > last_possibly_incomplete_mark_rows) { throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Column {} has rows count {} in last mark according to size in memory " - "and size of single value, but index granularity in part {} in last mark has {} rows which is less than in column", - backQuote(column.name), rows_in_column - index_granularity_without_last_mark, name, last_possibly_incomplete_mark_rows); + ErrorCodes::LOGICAL_ERROR, + "Column {} has rows count {} in last mark according to size in memory " + "and size of single value, " + "but index granularity in part {} " + "in last mark has {} rows which is less than in column", + backQuote(column.name), rows_in_column - index_granularity_without_last_mark, + name, last_possibly_incomplete_mark_rows); } } } @@ -1201,24 +1208,22 @@ void IMergeTreeDataPart::loadRowsCount() if (column_size % sizeof_field != 0) { - throw Exception( - "Uncompressed size of column " + column.name + "(" + toString(column_size) - + ") is not divisible by the size of value (" + toString(sizeof_field) + ")", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Uncompressed size of column {}({}) is not divisible by the size of value ({})", + column.name, column_size, sizeof_field); } size_t last_mark_index_granularity = index_granularity.getLastNonFinalMarkRows(); size_t rows_approx = index_granularity.getTotalRows(); if (!(rows_count <= rows_approx && rows_approx < rows_count + last_mark_index_granularity)) - throw Exception( - "Unexpected size of column " + column.name + ": " + toString(rows_count) + " rows, expected " - + toString(rows_approx) + "+-" + toString(last_mark_index_granularity) + " rows according to the index", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected size of column {}: " + "{} rows, expected {}+-{} rows according to the index", + column.name, rows_count, rows_approx, toString(last_mark_index_granularity)); return; } - throw Exception("Data part doesn't contain fixed size column (even Date column)", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Data part doesn't contain fixed size column (even Date column)"); } } @@ -1246,11 +1251,11 @@ void IMergeTreeDataPart::loadTTLInfos() } catch (const JSONException &) { - throw Exception("Error while parsing file ttl.txt in part: " + name, ErrorCodes::BAD_TTL_FILE); + throw Exception(ErrorCodes::BAD_TTL_FILE, "Error while parsing file ttl.txt in part: {}", name); } } else - throw Exception("Unknown ttl format version: " + toString(format_version), ErrorCodes::BAD_TTL_FILE); + throw Exception(ErrorCodes::BAD_TTL_FILE, "Unknown ttl format version: {}", toString(format_version)); } } @@ -1268,7 +1273,7 @@ void IMergeTreeDataPart::loadUUID() auto in = metadata_manager->read(UUID_FILE_NAME); readText(uuid, *in); if (uuid == UUIDHelpers::Nil) - throw Exception("Unexpected empty " + String(UUID_FILE_NAME) + " in part: " + name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected empty {} in part: {}", String(UUID_FILE_NAME), name); } } @@ -1290,8 +1295,8 @@ void IMergeTreeDataPart::loadColumns(bool require) { /// We can get list of columns only from columns.txt in compact parts. if (require || part_type == Type::Compact) - throw Exception("No columns.txt in part " + name + ", expected path " + path + " on drive " + getDataPartStorage().getDiskName(), - ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns.txt in part {}, expected path {} on drive {}", + name, path, getDataPartStorage().getDiskName()); /// If there is no file with a list of columns, write it down. for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAllPhysical()) @@ -1299,7 +1304,7 @@ void IMergeTreeDataPart::loadColumns(bool require) loaded_columns.push_back(column); if (columns.empty()) - throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); writeColumns(loaded_columns, {}); } @@ -1753,9 +1758,9 @@ MutableDataPartStoragePtr IMergeTreeDataPart::makeCloneOnDisk(const DiskPtr & di assertOnDisk(); if (disk->getName() == getDataPartStorage().getDiskName()) - throw Exception("Can not clone data part " + name + " to same disk " + getDataPartStorage().getDiskName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to same disk {}", name, getDataPartStorage().getDiskName()); if (directory_name.empty()) - throw Exception("Can not clone data part " + name + " to empty directory.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not clone data part {} to empty directory.", name); String path_to_clone = fs::path(storage.relative_data_path) / directory_name / ""; return getDataPartStorage().clonePart(path_to_clone, getDataPartStorage().getPartDirectory(), disk, storage.log); @@ -1778,23 +1783,23 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!pk.column_names.empty() && (!checksums.files.contains("primary" + getIndexExtension(false)) && !checksums.files.contains("primary" + getIndexExtension(true)))) - throw Exception("No checksum for " + toString("primary" + getIndexExtension(false)) + " or " + toString("primary" + getIndexExtension(true)), - ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for {} or {}", + toString("primary" + getIndexExtension(false)), toString("primary" + getIndexExtension(true))); if (storage.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { if (!checksums.files.contains("count.txt")) - throw Exception("No checksum for count.txt", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for count.txt"); if (metadata_snapshot->hasPartitionKey() && !checksums.files.contains("partition.dat")) - throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for partition.dat"); if (!isEmpty() && !parent_part) { for (const String & col_name : storage.getMinMaxColumnsNames(partition_key)) { if (!checksums.files.contains("minmax_" + escapeForFileName(col_name) + ".idx")) - throw Exception("No minmax idx file checksum for column " + col_name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No minmax idx file checksum for column {}", col_name); } } } @@ -1840,7 +1845,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const void IMergeTreeDataPart::checkConsistency(bool /* require_part_metadata */) const { - throw Exception("Method 'checkConsistency' is not implemented for part with type " + getType().toString(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method 'checkConsistency' is not implemented for part with type {}", getType().toString()); } void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk() @@ -1852,7 +1857,7 @@ void IMergeTreeDataPart::calculateColumnsAndSecondaryIndicesSizesOnDisk() void IMergeTreeDataPart::calculateColumnsSizesOnDisk() { if (getColumns().empty() || checksums.empty()) - throw Exception("Cannot calculate columns sizes when columns or checksums are not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot calculate columns sizes when columns or checksums are not initialized"); calculateEachColumnSizes(columns_sizes, total_columns_size); } @@ -1860,7 +1865,7 @@ void IMergeTreeDataPart::calculateColumnsSizesOnDisk() void IMergeTreeDataPart::calculateSecondaryIndicesSizesOnDisk() { if (checksums.empty()) - throw Exception("Cannot calculate secondary indexes sizes when columns or checksums are not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot calculate secondary indexes sizes when columns or checksums are not initialized"); auto secondary_indices_descriptions = storage.getInMemoryMetadataPtr()->secondary_indices; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 2c5169a1729..68d5147362b 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -1,6 +1,6 @@ #pragma once -#include "IO/WriteSettings.h" +#include #include #include #include @@ -22,8 +22,6 @@ #include #include -#include - namespace zkutil { diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 8711664d531..10476c1b129 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -88,9 +88,8 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns size_t num_columns = requested_columns.size(); if (res_columns.size() != num_columns) - throw Exception("invalid number of columns passed to MergeTreeReader::fillMissingColumns. " - "Expected " + toString(num_columns) + ", " - "got " + toString(res_columns.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "invalid number of columns passed to MergeTreeReader::fillMissingColumns. " + "Expected {}, got {}", num_columns, res_columns.size()); /// Convert columns list to block. /// TODO: rewrite with columns interface. It will be possible after changes in ExpressionActions. @@ -171,14 +170,9 @@ void IMergeTreeReader::performRequiredConversions(Columns & res_columns) const if (res_columns.size() != num_columns) { - throw Exception( - "Invalid number of columns passed to MergeTreeReader::performRequiredConversions. " - "Expected " - + toString(num_columns) - + ", " - "got " - + toString(res_columns.size()), - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Invalid number of columns passed to MergeTreeReader::performRequiredConversions. " + "Expected {}, got {}", num_columns, res_columns.size()); } Block copy_block; @@ -262,9 +256,8 @@ IMergeTreeReader::ColumnPosition IMergeTreeReader::findColumnForOffsets(const Na void IMergeTreeReader::checkNumberOfColumns(size_t num_columns_to_read) const { if (num_columns_to_read != requested_columns.size()) - throw Exception("invalid number of columns passed to MergeTreeReader::readRows. " - "Expected " + toString(requested_columns.size()) + ", " - "got " + toString(num_columns_to_read), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "invalid number of columns passed to MergeTreeReader::readRows. " + "Expected {}, got {}", requested_columns.size(), num_columns_to_read); } } diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index 2340bdd99b2..1fcf564693f 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1410,10 +1410,8 @@ static void castValueToType(const DataTypePtr & desired_type, Field & src_value, } catch (...) { - throw Exception("Key expression contains comparison between inconvertible types: " + - desired_type->getName() + " and " + src_type->getName() + - " inside " + node_column_name, - ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Key expression contains comparison between inconvertible types: " + "{} and {} inside {}", desired_type->getName(), src_type->getName(), node_column_name); } } @@ -1445,7 +1443,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme return false; if (key_column_num == static_cast(-1)) - throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "`key_column_num` wasn't initialized. It is a bug."); } else if (num_args == 2) { @@ -1544,7 +1542,7 @@ bool KeyCondition::extractAtomFromTree(const RPNBuilderTreeNode & node, RPNEleme return false; if (key_column_num == static_cast(-1)) - throw Exception("`key_column_num` wasn't initialized. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "`key_column_num` wasn't initialized. It is a bug."); /// Replace on to <-sign> if (key_arg_pos == 1) @@ -1844,11 +1842,11 @@ KeyCondition::Description KeyCondition::getDescription() const rpn_stack.emplace_back(Frame{.can_be_true = std::move(can_be_true), .can_be_false = std::move(can_be_false)}); } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::checkInRange"); std::vector key_names(key_columns.size()); std::vector is_key_used(key_columns.size(), false); @@ -2276,7 +2274,7 @@ BoolMask KeyCondition::checkInHyperrectangle( || element.function == RPNElement::FUNCTION_NOT_IN_SET) { if (!element.set_index) - throw Exception("Set for IN is not created yet", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Set for IN is not created yet"); rpn_stack.emplace_back(element.set_index->checkInRange(hyperrectangle, data_types, single_point)); if (element.function == RPNElement::FUNCTION_NOT_IN_SET) @@ -2315,11 +2313,11 @@ BoolMask KeyCondition::checkInHyperrectangle( rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::checkInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::checkInRange"); return rpn_stack[0]; } @@ -2481,11 +2479,11 @@ bool KeyCondition::unknownOrAlwaysTrue(bool unknown_any) const rpn_stack.back() = arg1 | arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::unknownOrAlwaysTrue", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::unknownOrAlwaysTrue"); return rpn_stack[0]; } @@ -2556,11 +2554,11 @@ bool KeyCondition::alwaysFalse() const rpn_stack.back() = 2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::alwaysFalse", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::alwaysFalse"); return rpn_stack[0] == 0; } diff --git a/src/Storages/MergeTree/MarkRange.cpp b/src/Storages/MergeTree/MarkRange.cpp index 903940efa94..0eea0e5afd1 100644 --- a/src/Storages/MergeTree/MarkRange.cpp +++ b/src/Storages/MergeTree/MarkRange.cpp @@ -18,12 +18,18 @@ bool MarkRange::operator<(const MarkRange & rhs) const /// We allow only consecutive non-intersecting ranges /// Here we check whether a beginning of one range lies inside another range /// (ranges are intersect) - const bool is_intersection = (begin <= rhs.begin && rhs.begin < end) || - (rhs.begin <= begin && begin < rhs.end); + if (this != &rhs) + { + const bool is_intersection = (begin <= rhs.begin && rhs.begin < end) || + (rhs.begin <= begin && begin < rhs.end); - if (is_intersection) - throw Exception(ErrorCodes::LOGICAL_ERROR, - "Intersecting mark ranges are not allowed, it is a bug! First range ({}, {}), second range ({}, {})", begin, end, rhs.begin, rhs.end); + if (is_intersection) + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Intersecting mark ranges are not allowed, it is a bug! " + "First range ({}, {}), second range ({}, {})", + begin, end, rhs.begin, rhs.end); + } return begin < rhs.begin && end <= rhs.begin; } @@ -48,4 +54,15 @@ std::string toString(const MarkRanges & ranges) return result; } +void assertSortedAndNonIntersecting(const MarkRanges & ranges) +{ + MarkRanges ranges_copy(ranges.begin(), ranges.end()); + /// Should also throw an exception if interseting range is found during comparison. + std::sort(ranges_copy.begin(), ranges_copy.end()); + if (ranges_copy != ranges) + throw Exception( + ErrorCodes::LOGICAL_ERROR, "Expected sorted and non intersecting ranges. Ranges: {}", + toString(ranges)); +} + } diff --git a/src/Storages/MergeTree/MarkRange.h b/src/Storages/MergeTree/MarkRange.h index fe02eb056b7..076fc7dfea2 100644 --- a/src/Storages/MergeTree/MarkRange.h +++ b/src/Storages/MergeTree/MarkRange.h @@ -34,4 +34,6 @@ size_t getLastMark(const MarkRanges & ranges); std::string toString(const MarkRanges & ranges); +void assertSortedAndNonIntersecting(const MarkRanges & ranges); + } diff --git a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp index 97226825629..8b3ee3f4325 100644 --- a/src/Storages/MergeTree/MergeFromLogEntryTask.cpp +++ b/src/Storages/MergeTree/MergeFromLogEntryTask.cpp @@ -174,8 +174,8 @@ ReplicatedMergeMutateTaskBase::PrepareResult MergeFromLogEntryTask::prepare() auto future_merged_part = std::make_shared(parts, entry.new_part_type); if (future_merged_part->name != entry.new_part_name) { - throw Exception("Future merged part name " + backQuote(future_merged_part->name) + " differs from part name in log entry: " - + backQuote(entry.new_part_name), ErrorCodes::BAD_DATA_PART_NAME); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Future merged part name {} differs from part name in log entry: {}", + backQuote(future_merged_part->name), backQuote(entry.new_part_name)); } std::optional tagger; diff --git a/src/Storages/MergeTree/MergeTask.cpp b/src/Storages/MergeTree/MergeTask.cpp index 1a411f1b2cd..35e1b82dae0 100644 --- a/src/Storages/MergeTree/MergeTask.cpp +++ b/src/Storages/MergeTree/MergeTask.cpp @@ -149,7 +149,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() } if (data_part_storage->exists()) - throw Exception("Directory " + data_part_storage->getFullPath() + " already exists", ErrorCodes::DIRECTORY_ALREADY_EXISTS); + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists", data_part_storage->getFullPath()); if (!global_ctx->parent_part) global_ctx->temporary_directory_lock = global_ctx->data->getTemporaryPartDirectoryHolder(local_tmp_part_basename); @@ -263,7 +263,7 @@ bool MergeTask::ExecuteAndFinalizeHorizontalPart::prepare() break; } default : - throw Exception("Merge algorithm must be chosen", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Merge algorithm must be chosen"); } assert(global_ctx->gathering_columns.size() == global_ctx->gathering_column_names.size()); @@ -447,9 +447,10 @@ bool MergeTask::VerticalMergeStage::prepareVerticalMergeForAllColumns() const /// number of input rows. if ((rows_sources_count > 0 || global_ctx->future_part->parts.size() > 1) && sum_input_rows_exact != rows_sources_count + input_rows_filtered) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number of bytes written to rows_sources file ({}). It is a bug.", - sum_input_rows_exact, input_rows_filtered, rows_sources_count); + ErrorCodes::LOGICAL_ERROR, + "Number of rows in source parts ({}) excluding filtered rows ({}) differs from number " + "of bytes written to rows_sources file ({}). It is a bug.", + sum_input_rows_exact, input_rows_filtered, rows_sources_count); ctx->rows_sources_read_buf = std::make_unique(ctx->tmp_disk->readFile(fileName(ctx->rows_sources_file->path()))); @@ -551,8 +552,8 @@ void MergeTask::VerticalMergeStage::finalizeVerticalMergeForOneColumn() const if (global_ctx->rows_written != ctx->column_elems_written) { - throw Exception("Written " + toString(ctx->column_elems_written) + " elements of column " + column_name + - ", but " + toString(global_ctx->rows_written) + " rows of PK columns", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Written {} elements of column {}, but {} rows of PK columns", + toString(ctx->column_elems_written), column_name, toString(global_ctx->rows_written)); } UInt64 rows = 0; diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index 5b6b0f09bc3..e2997df3bb0 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -406,9 +406,9 @@ IMergeTreeSelectAlgorithm::BlockAndProgress IMergeTreeSelectAlgorithm::readFromP const auto & sample_block = task->range_reader.getSampleBlock(); if (read_result.num_rows != 0 && sample_block.columns() != read_result.columns.size()) - throw Exception("Inconsistent number of columns got from MergeTreeRangeReader. " - "Have " + toString(sample_block.columns()) + " in sample block " - "and " + toString(read_result.columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent number of columns got from MergeTreeRangeReader. " + "Have {} in sample block and {} columns in list", + toString(sample_block.columns()), toString(read_result.columns.size())); /// TODO: check columns have the same types as in header. @@ -545,8 +545,7 @@ static void injectPartConstVirtualColumns( if (!virtual_columns.empty()) { if (unlikely(rows && !task)) - throw Exception("Cannot insert virtual columns to non-empty chunk without specified task.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task."); const IMergeTreeDataPart * part = nullptr; if (rows) @@ -627,8 +626,8 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher auto & row_level_column = block.getByName(prewhere_info->row_level_column_name); if (!row_level_column.type->canBeUsedInBooleanContext()) { - throw Exception("Invalid type for filter in PREWHERE: " + row_level_column.type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + row_level_column.type->getName()); } block.erase(prewhere_info->row_level_column_name); @@ -640,8 +639,8 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher auto & prewhere_column = block.getByName(prewhere_info->prewhere_column_name); if (!prewhere_column.type->canBeUsedInBooleanContext()) { - throw Exception("Invalid type for filter in PREWHERE: " + prewhere_column.type->getName(), - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Invalid type for filter in PREWHERE: {}", + prewhere_column.type->getName()); } if (prewhere_info->remove_prewhere_column) @@ -655,7 +654,8 @@ Block IMergeTreeSelectAlgorithm::applyPrewhereActions(Block block, const Prewher prewhere_column.column = prewhere_column.type->createColumnConst(block.rows(), 1.0f)->convertToFullColumnIfConst(); else throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, "Illegal type {} of column for filter", prewhere_column.type->getName()); + ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Illegal type {} of column for filter", prewhere_column.type->getName()); } } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 525d76d0f0f..6bd8cc60979 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -233,9 +233,9 @@ void MergeTreeBlockSizePredictor::startBlock() void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay) { if (columns.size() != sample_block.columns()) - throw Exception("Inconsistent number of columns passed to MergeTreeBlockSizePredictor. " - "Have " + toString(sample_block.columns()) + " in sample block " - "and " + toString(columns.size()) + " columns in list", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Inconsistent number of columns passed to MergeTreeBlockSizePredictor. " + "Have {} in sample block and {} columns in list", + toString(sample_block.columns()), toString(columns.size())); if (!is_initialized_in_update) { @@ -246,8 +246,8 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum if (num_rows < block_size_rows) { - throw Exception("Updated block has less rows (" + toString(num_rows) + ") than previous one (" + toString(block_size_rows) + ")", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Updated block has less rows ({}) than previous one ({})", + num_rows, block_size_rows); } size_t diff_rows = num_rows - block_size_rows; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 9640e2fb366..b810223efa1 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -166,11 +166,11 @@ namespace ErrorCodes static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct) { if (metadata.sampling_key.column_names.empty()) - throw Exception("There are no columns in sampling expression", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "There are no columns in sampling expression"); const auto & pk_sample_block = metadata.getPrimaryKey().sample_block; if (!pk_sample_block.has(metadata.sampling_key.column_names[0]) && !allow_sampling_expression_not_in_primary_key) - throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sampling expression must be present in the primary key"); if (!check_sample_column_is_correct) return; @@ -192,10 +192,9 @@ static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool } if (!is_correct_sample_condition) - throw Exception( - "Invalid sampling column type in storage parameters: " + sampling_column_type->getName() - + ". Must be one unsigned integer type", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid sampling column type in storage parameters: {}. Must be one unsigned integer type", + sampling_column_type->getName()); } @@ -208,7 +207,7 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; if (relative_data_path.empty()) - throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "MergeTree storages require data path"); const auto format_version_path = fs::path(relative_data_path) / MergeTreeData::FORMAT_VERSION_FILE_NAME; std::optional read_format_version; @@ -235,7 +234,9 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (!read_format_version.has_value()) read_format_version = current_format_version; else if (*read_format_version != current_format_version) - throw Exception(ErrorCodes::CORRUPTED_DATA, "Version file on {} contains version {} expected version is {}.", fullPath(disk, format_version_path), current_format_version, *read_format_version); + throw Exception(ErrorCodes::CORRUPTED_DATA, + "Version file on {} contains version {} expected version is {}.", + fullPath(disk, format_version_path), current_format_version, *read_format_version); } } @@ -270,9 +271,7 @@ void MergeTreeData::initializeDirectoriesAndFormatVersion(const std::string & re if (format_version < min_format_version) { if (min_format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING.toUnderType()) - throw Exception( - "MergeTree data format version on disk doesn't support custom partitioning", - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "MergeTree data format version on disk doesn't support custom partitioning"); } } @@ -319,7 +318,7 @@ MergeTreeData::MergeTreeData( checkPartitionKeyAndInitMinMax(metadata_.partition_key); setProperties(metadata_, metadata_, attach); if (minmax_idx_date_column_pos == -1) - throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Could not find Date column"); } catch (Exception & e) { @@ -392,7 +391,7 @@ bool MergeTreeData::supportsFinal() const static void checkKeyExpression(const ExpressionActions & expr, const Block & sample_block, const String & key_name, bool allow_nullable_key) { if (expr.hasArrayJoin()) - throw Exception(key_name + " key cannot contain array joins", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} key cannot contain array joins", key_name); try { @@ -412,7 +411,9 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam if (!allow_nullable_key && hasNullable(element.type)) throw Exception( - ErrorCodes::ILLEGAL_COLUMN, "{} key contains nullable columns, but merge tree setting `allow_nullable_key` is disabled", key_name); + ErrorCodes::ILLEGAL_COLUMN, + "{} key contains nullable columns, " + "but merge tree setting `allow_nullable_key` is disabled", key_name); } } @@ -420,7 +421,7 @@ void MergeTreeData::checkProperties( const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) const { if (!new_metadata.sorting_key.definition_ast) - throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); KeyDescription new_sorting_key = new_metadata.sorting_key; KeyDescription new_primary_key = new_metadata.primary_key; @@ -428,9 +429,8 @@ void MergeTreeData::checkProperties( size_t sorting_key_size = new_sorting_key.column_names.size(); size_t primary_key_size = new_primary_key.column_names.size(); if (primary_key_size > sorting_key_size) - throw Exception("Primary key must be a prefix of the sorting key, but its length: " - + toString(primary_key_size) + " is greater than the sorting key length: " + toString(sorting_key_size), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " + "{} is greater than the sorting key length: {}", primary_key_size, sorting_key_size); NameSet primary_key_columns_set; @@ -442,12 +442,12 @@ void MergeTreeData::checkProperties( { const String & pk_column = new_primary_key.column_names[i]; if (pk_column != sorting_key_column) - throw Exception("Primary key must be a prefix of the sorting key, but the column in the position " - + toString(i) + " is " + sorting_key_column +", not " + pk_column, - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Primary key must be a prefix of the sorting key, " + "but the column in the position {} is {}", i, sorting_key_column +", not " + pk_column); if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception("Primary key contains duplicate columns", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); } } @@ -490,14 +490,15 @@ void MergeTreeData::checkProperties( for (const String & col : used_columns) { if (!added_columns.contains(col) || deleted_columns.contains(col)) - throw Exception("Existing column " + backQuoteIfNeed(col) + " is used in the expression that was " - "added to the sorting key. You can add expressions that use only the newly added columns", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Existing column {} is used in the expression that was added to the sorting key. " + "You can add expressions that use only the newly added columns", + backQuoteIfNeed(col)); if (new_metadata.columns.getDefaults().contains(col)) - throw Exception("Newly added column " + backQuoteIfNeed(col) + " has a default expression, so adding " - "expressions that use it to the sorting key is forbidden", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Newly added column {} has a default expression, so adding expressions that use " + "it to the sorting key is forbidden", backQuoteIfNeed(col)); } } } @@ -512,9 +513,7 @@ void MergeTreeData::checkProperties( MergeTreeIndexFactory::instance().validate(index, attach); if (indices_names.find(index.name) != indices_names.end()) - throw Exception( - "Index with name " + backQuote(index.name) + " already exists", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index with name {} already exists", backQuote(index.name)); indices_names.insert(index.name); } @@ -527,9 +526,7 @@ void MergeTreeData::checkProperties( for (const auto & projection : new_metadata.projections) { if (projections_names.find(projection.name) != projections_names.end()) - throw Exception( - "Projection with name " + backQuote(projection.name) + " already exists", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection with name {} already exists", backQuote(projection.name)); projections_names.insert(projection.name); } @@ -674,7 +671,7 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta for (const auto & [name, ttl_description] : new_column_ttls) { if (columns_ttl_forbidden.contains(name)) - throw Exception("Trying to set TTL for key column " + name, ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Trying to set TTL for key column {}", name); } } auto new_table_ttl = new_metadata.table_ttl; @@ -685,13 +682,12 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta { if (!move_ttl.if_exists && !getDestinationForMoveTTL(move_ttl)) { - String message; if (move_ttl.destination_type == DataDestinationType::DISK) - message = "No such disk " + backQuote(move_ttl.destination_name) + " for given storage policy"; + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, + "No such disk {} for given storage policy", backQuote(move_ttl.destination_name)); else - message = "No such volume " + backQuote(move_ttl.destination_name) + " for given storage policy"; - - throw Exception(message, ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, + "No such volume {} for given storage policy", backQuote(move_ttl.destination_name)); } } } @@ -710,16 +706,17 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat const auto columns = metadata.getColumns().getAllPhysical(); if (!sign_column.empty() && mode != MergingParams::Collapsing && mode != MergingParams::VersionedCollapsing) - throw Exception("Sign column for MergeTree cannot be specified in modes except Collapsing or VersionedCollapsing.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Sign column for MergeTree cannot be specified " + "in modes except Collapsing or VersionedCollapsing."); if (!version_column.empty() && mode != MergingParams::Replacing && mode != MergingParams::VersionedCollapsing) - throw Exception("Version column for MergeTree cannot be specified in modes except Replacing or VersionedCollapsing.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Version column for MergeTree cannot be specified " + "in modes except Replacing or VersionedCollapsing."); if (!columns_to_sum.empty() && mode != MergingParams::Summing) - throw Exception("List of columns to sum for MergeTree cannot be specified in all modes except Summing.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "List of columns to sum for MergeTree cannot be specified in all modes except Summing."); /// Check that if the sign column is needed, it exists and is of type Int8. auto check_sign_column = [this, & columns](bool is_optional, const std::string & storage) @@ -729,7 +726,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (is_optional) return; - throw Exception("Logical error: Sign column for storage " + storage + " is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Sign column for storage {} is empty", storage); } bool miss_column = true; @@ -738,14 +735,14 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (column.name == sign_column) { if (!typeid_cast(column.type.get())) - throw Exception("Sign column (" + sign_column + ") for storage " + storage + " must have type Int8." - " Provided column of type " + column.type->getName() + ".", ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, "Sign column ({}) for storage {} must have type Int8. " + "Provided column of type {}.", sign_column, storage, column.type->getName()); miss_column = false; break; } } if (miss_column) - throw Exception("Sign column " + sign_column + " does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Sign column {} does not exist in table declaration.", sign_column); }; /// that if the version_column column is needed, it exists and is of unsigned integer type. @@ -756,7 +753,7 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (is_optional) return; - throw Exception("Logical error: Version column for storage " + storage + " is empty", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: Version column for storage {} is empty", storage); } bool miss_column = true; @@ -765,16 +762,16 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat if (column.name == version_column) { if (!column.type->canBeUsedAsVersion()) - throw Exception("The column " + version_column + - " cannot be used as a version column for storage " + storage + - " because it is of type " + column.type->getName() + - " (must be of an integer type or of type Date/DateTime/DateTime64)", ErrorCodes::BAD_TYPE_OF_FIELD); + throw Exception(ErrorCodes::BAD_TYPE_OF_FIELD, + "The column {} cannot be used as a version column for storage {} because it is " + "of type {} (must be of an integer type or of type Date/DateTime/DateTime64)", + version_column, storage, column.type->getName()); miss_column = false; break; } } if (miss_column) - throw Exception("Version column " + version_column + " does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Version column {} does not exist in table declaration.", version_column); }; if (mode == MergingParams::Collapsing) @@ -790,8 +787,9 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat return column_to_sum == Nested::extractTableName(name_and_type.name); }; if (columns.end() == std::find_if(columns.begin(), columns.end(), check_column_to_sum_exists)) - throw Exception( - "Column " + column_to_sum + " listed in columns to sum does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, + "Column {} listed in columns to sum does not exist in table declaration.", + column_to_sum); } /// Check that summing columns are not in partition key. @@ -805,8 +803,8 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat std::back_inserter(names_intersection)); if (!names_intersection.empty()) - throw Exception("Columns: " + boost::algorithm::join(names_intersection, ", ") + - " listed both in columns to sum and in partition key. That is not allowed.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Columns: {} listed both in columns to sum and in partition key. " + "That is not allowed.", boost::algorithm::join(names_intersection, ", ")); } } @@ -1244,7 +1242,7 @@ MergeTreeData::LoadPartResult MergeTreeData::loadDataPart( return res; } else - throw Exception("Part " + res.part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists but with different checksums", res.part->name); } if (to_state == DataPartState::Active) @@ -1428,7 +1426,7 @@ void MergeTreeData::loadDataPartsFromWAL(MutableDataPartsVector & parts_from_wal if ((*it)->checksums.getTotalChecksumHex() == part->checksums.getTotalChecksumHex()) LOG_ERROR(log, "Remove duplicate part {}", part->getDataPartStorage().getFullPath()); else - throw Exception("Part " + part->name + " already exists but with different checksums", ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists but with different checksums", part->name); } else { @@ -1624,9 +1622,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) { std::lock_guard lock(wal_init_lock); if (write_ahead_log != nullptr) - throw Exception( - "There are multiple WAL files appeared in current storage policy. You need to resolve this manually", - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, + "There are multiple WAL files appeared in current storage policy. " + "You need to resolve this manually"); write_ahead_log = std::make_shared(*this, disk_ptr, it->name()); for (auto && part : write_ahead_log->restore(metadata_snapshot, getContext(), part_lock, is_static_storage)) @@ -1660,9 +1658,9 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) } if (have_non_adaptive_parts && have_adaptive_parts && !settings->enable_mixed_granularity_parts) - throw Exception( - "Table contains parts with adaptive and non adaptive marks, but `setting enable_mixed_granularity_parts` is disabled", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Table contains parts with adaptive and non adaptive marks, " + "but `setting enable_mixed_granularity_parts` is disabled"); has_non_adaptive_index_granularity_parts = have_non_adaptive_parts; has_lightweight_delete_parts = have_lightweight_in_parts; @@ -1670,10 +1668,13 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks) if (suspicious_broken_parts > settings->max_suspicious_broken_parts && !skip_sanity_checks) throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, - "Suspiciously many ({} parts, {} in total) broken parts to remove while maximum allowed broken parts count is {}. You can change the maximum value " - "with merge tree setting 'max_suspicious_broken_parts' in configuration section or in table settings in .sql file " + "Suspiciously many ({} parts, {} in total) broken parts " + "to remove while maximum allowed broken parts count is {}. You can change the maximum value " + "with merge tree setting 'max_suspicious_broken_parts' " + "in configuration section or in table settings in .sql file " "(don't forget to return setting back to default value)", - suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes), settings->max_suspicious_broken_parts); + suspicious_broken_parts, formatReadableSizeWithBinarySuffix(suspicious_broken_parts_bytes), + settings->max_suspicious_broken_parts); if (suspicious_broken_parts_bytes > settings->max_suspicious_broken_parts_bytes && !skip_sanity_checks) throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, @@ -2506,7 +2507,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_ for (const auto & disk : disks) { if (disk->exists(new_table_path)) - throw Exception{"Target path already exists: " + fullPath(disk, new_table_path), ErrorCodes::DIRECTORY_ALREADY_EXISTS}; + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Target path already exists: {}", fullPath(disk, new_table_path)); } { @@ -2630,8 +2631,9 @@ void MergeTreeData::dropAllData() disk->listFiles(relative_data_path, files_left); throw Exception( - ErrorCodes::ZERO_COPY_REPLICATION_ERROR, "Directory {} with table {} not empty (files [{}]) after drop. Will not drop.", - relative_data_path, getStorageID().getNameForLogs(), fmt::join(files_left, ", ")); + ErrorCodes::ZERO_COPY_REPLICATION_ERROR, + "Directory {} with table {} not empty (files [{}]) after drop. Will not drop.", + relative_data_path, getStorageID().getNameForLogs(), fmt::join(files_left, ", ")); } LOG_INFO(log, "dropAllData: removing table directory recursive to cleanup garbage"); @@ -2729,10 +2731,10 @@ void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataTy { /// Check new type can be used as version if (!new_type->canBeUsedAsVersion()) - throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + - " to type " + new_type->getName() + - " because version column must be of an integer type or of type Date or DateTime" - , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "Cannot alter version column {} to type {} because version column must be " + "of an integer type or of type Date or DateTime" , backQuoteIfNeed(column_name), + new_type->getName()); auto which_new_type = WhichDataType(new_type); auto which_old_type = WhichDataType(old_type); @@ -2745,21 +2747,19 @@ void checkVersionColumnTypesConversion(const IDataType * old_type, const IDataTy || (which_old_type.isDateTime() && !which_new_type.isDateTime()) || (which_old_type.isFloat() && !which_new_type.isFloat())) { - throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + - " from type " + old_type->getName() + - " to type " + new_type->getName() + " because new type will change sort order of version column." + - " The only possible conversion is expansion of the number of bytes of the current type." - , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "Cannot alter version column {} from type {} to type {} " + "because new type will change sort order of version column. " + "The only possible conversion is expansion of the number of bytes of the current type.", + backQuoteIfNeed(column_name), old_type->getName(), new_type->getName()); } /// Check alter to smaller size: UInt64 -> UInt32 and so on if (new_type->getSizeOfValueInMemory() < old_type->getSizeOfValueInMemory()) { - throw Exception("Cannot alter version column " + backQuoteIfNeed(column_name) + - " from type " + old_type->getName() + - " to type " + new_type->getName() + " because new type is smaller than current in the number of bytes." + - " The only possible conversion is expansion of the number of bytes of the current type." - , ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "Cannot alter version column {} from type {} to type {} " + "because new type is smaller than current in the number of bytes. " + "The only possible conversion is expansion of the number of bytes of the current type.", + backQuoteIfNeed(column_name), old_type->getName(), new_type->getName()); } } @@ -2780,15 +2780,16 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context auto mutation_commands = commands.getMutationCommands(new_metadata, settings.materialize_ttl_after_modify, getContext()); if (!mutation_commands.empty()) - throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "The following alter commands: '{}' will modify data on disk, but setting `allow_non_metadata_alters` is disabled", queryToString(mutation_commands.ast())); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "The following alter commands: '{}' will modify data on disk, " + "but setting `allow_non_metadata_alters` is disabled", + queryToString(mutation_commands.ast())); } - if (commands.hasInvertedIndex(new_metadata, getContext()) && !settings.allow_experimental_inverted_index) - { - throw Exception( - "Experimental Inverted Index feature is not enabled (the setting 'allow_experimental_inverted_index')", - ErrorCodes::SUPPORT_IS_DISABLED); - } + if (commands.hasInvertedIndex(new_metadata) && !settings.allow_experimental_inverted_index) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental Inverted Index feature is not enabled (turn on setting 'allow_experimental_inverted_index')"); + commands.apply(new_metadata, getContext()); /// Set of columns that shouldn't be altered. @@ -2889,46 +2890,39 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context if (command.type == AlterCommand::MODIFY_ORDER_BY && !is_custom_partitioned) { - throw Exception( - "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY ORDER BY is not supported for default-partitioned tables created with the old syntax"); } if (command.type == AlterCommand::MODIFY_TTL && !is_custom_partitioned) { - throw Exception( - "ALTER MODIFY TTL is not supported for default-partitioned tables created with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY TTL is not supported for default-partitioned tables created with the old syntax"); } if (command.type == AlterCommand::MODIFY_SAMPLE_BY) { if (!is_custom_partitioned) - throw Exception( - "ALTER MODIFY SAMPLE BY is not supported for default-partitioned tables created with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "ALTER MODIFY SAMPLE BY is not supported for default-partitioned tables created with the old syntax"); checkSampleExpression(new_metadata, getSettings()->compatibility_allow_sampling_expression_not_in_primary_key, getSettings()->check_sample_column_is_correct); } if (command.type == AlterCommand::ADD_INDEX && !is_custom_partitioned) { - throw Exception( - "ALTER ADD INDEX is not supported for tables with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ALTER ADD INDEX is not supported for tables with the old syntax"); } if (command.type == AlterCommand::ADD_PROJECTION) { if (!is_custom_partitioned) - throw Exception( - "ALTER ADD PROJECTION is not supported for tables with the old syntax", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "ALTER ADD PROJECTION is not supported for tables with the old syntax"); } if (command.type == AlterCommand::RENAME_COLUMN) { if (columns_in_keys.contains(command.column_name)) { - throw Exception( - "Trying to ALTER RENAME key " + backQuoteIfNeed(command.column_name) + " column which is a part of key expression", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "Trying to ALTER RENAME key {} column which is a part of key expression", + backQuoteIfNeed(command.column_name)); } } else if (command.type == AlterCommand::DROP_COLUMN) @@ -2967,7 +2961,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context for (const auto & reset_setting : command.settings_resets) { if (!settings_from_storage->has(reset_setting)) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot reset setting '{}' because it doesn't exist for MergeTree engines family", reset_setting); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot reset setting '{}' because it doesn't exist for MergeTree engines family", + reset_setting); } } else if (command.isRequireMutationStage(getInMemoryMetadata())) @@ -2975,8 +2971,8 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context /// This alter will override data on disk. Let's check that it doesn't /// modify immutable column. if (columns_alter_type_forbidden.contains(command.column_name)) - throw Exception("ALTER of key column " + backQuoteIfNeed(command.column_name) + " is forbidden", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, "ALTER of key column {} is forbidden", + backQuoteIfNeed(command.column_name)); if (command.type == AlterCommand::MODIFY_COLUMN) { @@ -2986,10 +2982,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context assert(it != old_types.end()); if (!isSafeForKeyConversion(it->second, command.data_type.get())) - throw Exception("ALTER of partition key column " + backQuoteIfNeed(command.column_name) + " from type " - + it->second->getName() + " to type " + command.data_type->getName() - + " is not safe because it can change the representation of partition key", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "ALTER of partition key column {} from type {} " + "to type {} is not safe because it can change the representation " + "of partition key", backQuoteIfNeed(command.column_name), + it->second->getName(), command.data_type->getName()); } if (columns_alter_type_metadata_only.contains(command.column_name)) @@ -2997,10 +2994,11 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context auto it = old_types.find(command.column_name); assert(it != old_types.end()); if (!isSafeForKeyConversion(it->second, command.data_type.get())) - throw Exception("ALTER of key column " + backQuoteIfNeed(command.column_name) + " from type " - + it->second->getName() + " to type " + command.data_type->getName() - + " is not safe because it can change the representation of primary key", - ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN); + throw Exception(ErrorCodes::ALTER_OF_COLUMN_IS_FORBIDDEN, + "ALTER of key column {} from type {} " + "to type {} is not safe because it can change the representation " + "of primary key", backQuoteIfNeed(command.column_name), + it->second->getName(), command.data_type->getName()); } if (old_metadata.getColumns().has(command.column_name)) @@ -3037,8 +3035,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context if ((!current_value || *current_value != new_value) && MergeTreeSettings::isReadonlySetting(setting_name)) { - throw Exception{"Setting '" + setting_name + "' is readonly for storage '" + getName() + "'", - ErrorCodes::READONLY_SETTING}; + throw Exception(ErrorCodes::READONLY_SETTING, "Setting '{}' is readonly for storage '{}'", setting_name, getName()); } if (!current_value && MergeTreeSettings::isPartFormatSetting(setting_name)) @@ -3047,7 +3044,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context copy.applyChange(changed_setting); String reason; if (!canUsePolymorphicParts(copy, &reason) && !reason.empty()) - throw Exception("Can't change settings. Reason: " + reason, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason); } if (setting_name == "storage_policy") @@ -3062,8 +3059,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context /// Prevent unsetting readonly setting if (MergeTreeSettings::isReadonlySetting(setting_name) && !new_value) { - throw Exception{"Setting '" + setting_name + "' is readonly for storage '" + getName() + "'", - ErrorCodes::READONLY_SETTING}; + throw Exception(ErrorCodes::READONLY_SETTING, "Setting '{}' is readonly for storage '{}'", setting_name, getName()); } if (MergeTreeSettings::isPartFormatSetting(setting_name) && !new_value) @@ -3073,7 +3069,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context copy->applyChanges(new_changes); String reason; if (!canUsePolymorphicParts(*copy, &reason) && !reason.empty()) - throw Exception("Can't change settings. Reason: " + reason, ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Can't change settings. Reason: {}", reason); } } @@ -3096,7 +3092,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context if (dropped_columns.size() > 1) postfix = "s"; throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Cannot drop or clear column{} '{}', because all columns in part '{}' will be removed from disk. Empty parts are not allowed", postfix, boost::algorithm::join(dropped_columns, ", "), part->name); + "Cannot drop or clear column{} '{}', because all columns " + "in part '{}' will be removed from disk. Empty parts are not allowed", + postfix, boost::algorithm::join(dropped_columns, ", "), part->name); } } } @@ -3146,7 +3144,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart(const String & name, else if (type == MergeTreeDataPartType::InMemory) return std::make_shared(*this, name, part_info, data_part_storage, parent_part); else - throw Exception("Unknown type of part " + data_part_storage->getRelativePath(), ErrorCodes::UNKNOWN_PART_TYPE); + throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown type of part {}", data_part_storage->getRelativePath()); } MergeTreeData::MutableDataPartPtr MergeTreeData::createPart( @@ -3207,7 +3205,7 @@ void MergeTreeData::changeSettings( { auto disk = new_storage_policy->getDiskByName(disk_name); if (disk->exists(relative_data_path)) - throw Exception("New storage policy contain disks which already contain data of a table with the same name", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "New storage policy contain disks which already contain data of a table with the same name"); } for (const String & disk_name : all_diff_disk_names) @@ -3252,7 +3250,7 @@ void MergeTreeData::PartsTemporaryRename::tryRenameAll() { const auto & [old_name, new_name, disk] = old_and_new_names[i]; if (old_name.empty() || new_name.empty()) - throw DB::Exception("Empty part name. Most likely it's a bug.", ErrorCodes::LOGICAL_ERROR); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Empty part name. Most likely it's a bug."); const auto full_path = fs::path(storage.relative_data_path) / source_dir; disk->moveFile(fs::path(full_path) / old_name, fs::path(full_path) / new_name); } @@ -3395,10 +3393,8 @@ void MergeTreeData::checkPartPartition(MutableDataPartPtr & part, DataPartsLock if (DataPartPtr existing_part_in_partition = getAnyPartInPartition(part->info.partition_id, lock)) { if (part->partition.value != existing_part_in_partition->partition.value) - throw Exception( - "Partition value mismatch between two parts with the same partition ID. Existing part: " - + existing_part_in_partition->name + ", newly added part: " + part->name, - ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Partition value mismatch between two parts with the same partition ID. " + "Existing part: {}, newly added part: {}", existing_part_in_partition->name, part->name); } } @@ -3408,15 +3404,14 @@ void MergeTreeData::checkPartDuplicate(MutableDataPartPtr & part, Transaction & if (it_duplicate != data_parts_by_info.end()) { - String message = "Part " + (*it_duplicate)->getNameWithState() + " already exists"; - if ((*it_duplicate)->checkState({DataPartState::Outdated, DataPartState::Deleting})) - throw Exception(message + ", but it will be deleted soon", ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part {} already exists, but it will be deleted soon", + (*it_duplicate)->getNameWithState()); if (transaction.txn) - throw Exception(message, ErrorCodes::SERIALIZATION_ERROR); + throw Exception(ErrorCodes::SERIALIZATION_ERROR, "Part {} already exists", (*it_duplicate)->getNameWithState()); - throw Exception(message, ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists", (*it_duplicate)->getNameWithState()); } } @@ -3447,8 +3442,7 @@ bool MergeTreeData::addTempPart( { LOG_TRACE(log, "Adding temporary part from directory {} with name {}.", part->getDataPartStorage().getPartDirectory(), part->name); if (&out_transaction.data != this) - throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeData::Transaction for one table cannot be used with another. It is a bug."); if (part->hasLightweightDelete()) has_lightweight_delete_parts.store(true); @@ -3490,7 +3484,7 @@ bool MergeTreeData::renameTempPartAndReplaceImpl( LOG_TRACE(log, "Renaming temporary part {} to {} with tid {}.", part->getDataPartStorage().getPartDirectory(), part->name, out_transaction.getTID()); if (&out_transaction.data != this) - throw Exception("MergeTreeData::Transaction for one table cannot be used with another. It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeData::Transaction for one table cannot be used with another. It is a bug."); part->assertState({DataPartState::Temporary}); checkPartPartition(part, lock); @@ -3564,7 +3558,7 @@ bool MergeTreeData::renameTempPartAndAdd( if (!covered_parts.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Added part {} covers {} existing part(s) (including {})", - part->name, toString(covered_parts.size()), covered_parts[0]->name); + part->name, covered_parts.size(), covered_parts[0]->name); return true; } @@ -3611,7 +3605,7 @@ void MergeTreeData::removePartsFromWorkingSetImmediatelyAndSetTemporaryState(con { auto it_part = data_parts_by_info.find(part->info); if (it_part == data_parts_by_info.end()) - throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} not found in data_parts", part->getNameWithState()); assert(part->getState() == MergeTreeDataPartState::PreActive); @@ -3629,7 +3623,7 @@ void MergeTreeData::removePartsFromWorkingSet( for (const auto & part : remove) { if (!data_parts_by_info.count(part->info)) - throw Exception("Part " + part->getNameWithState() + " not found in data_parts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} not found in data_parts", part->getNameWithState()); part->assertState({DataPartState::PreActive, DataPartState::Active, DataPartState::Outdated}); } @@ -3656,7 +3650,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange( for (const DataPartPtr & part : partition_range) { if (part->info.partition_id != drop_range.partition_id) - throw Exception("Unexpected partition_id of part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected partition_id of part {}. This is a bug.", part->name); /// It's a DROP PART and it's already executed by fetching some covering part bool is_drop_part = !drop_range.isFakeDropRangePart() && drop_range.min_block; @@ -3791,7 +3785,7 @@ void MergeTreeData::forcefullyMovePartToDetachedAndRemoveFromMemory(const MergeT auto it_part = data_parts_by_info.find(part_to_detach->info); if (it_part == data_parts_by_info.end()) - throw Exception("No such data part " + part_to_detach->getNameWithState(), ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "No such data part {}", part_to_detach->getNameWithState()); /// What if part_to_detach is a reference to *it_part? Make a new owner just in case. /// Important to own part pointer here (not const reference), because it will be removed from data_parts_indexes @@ -3934,7 +3928,7 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part) { auto it = data_parts_by_info.find(part->info); if (it == data_parts_by_info.end() || (*it).get() != part.get()) - throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} doesn't exist", part->name); part.reset(); @@ -4068,7 +4062,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex ErrorCodes::TOO_MANY_PARTS, "Too many parts ({}) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified " "with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", - toString(parts_count_in_total)); + parts_count_in_total); } size_t outdated_parts_over_threshold = 0; @@ -4202,7 +4196,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) { auto active_part_it = data_parts_by_info.find(original_active_part->info); if (active_part_it == data_parts_by_info.end()) - throw Exception("Cannot swap part '" + part_copy->name + "', no such active part.", ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Cannot swap part '{}', no such active part.", part_copy->name); /// We do not check allow_remote_fs_zero_copy_replication here because data may be shared /// when allow_remote_fs_zero_copy_replication turned on and off again @@ -4234,7 +4228,7 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy) return; } } - throw Exception("Cannot swap part '" + part_copy->name + "', no such active part.", ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Cannot swap part '{}', no such active part.", part_copy->name); } @@ -4455,8 +4449,9 @@ void MergeTreeData::checkAlterPartitionIsPossible( { if (command.type == PartitionCommand::DROP_DETACHED_PARTITION && !settings.allow_drop_detached) - throw DB::Exception("Cannot execute query: DROP DETACHED PART is disabled " - "(see allow_drop_detached setting)", ErrorCodes::SUPPORT_IS_DISABLED); + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Cannot execute query: DROP DETACHED PART " + "is disabled (see allow_drop_detached setting)"); if (command.partition && command.type != PartitionCommand::DROP_DETACHED_PARTITION) { @@ -4473,7 +4468,7 @@ void MergeTreeData::checkAlterPartitionIsPossible( if (partition_ast && partition_ast->all) { if (command.type != PartitionCommand::DROP_PARTITION) - throw DB::Exception("Only support DETACH PARTITION ALL currently", ErrorCodes::SUPPORT_IS_DISABLED); + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only support DETACH PARTITION ALL currently"); } else getPartitionIDFromQuery(command.partition, getContext()); @@ -4542,15 +4537,13 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & { String no_parts_to_move_message; if (moving_part) - no_parts_to_move_message = "Part '" + partition_id + "' is already on disk '" + disk->getName() + "'"; + throw Exception(ErrorCodes::UNKNOWN_DISK, "Part '{}' is already on disk '{}'", partition_id, disk->getName()); else - no_parts_to_move_message = "All parts of partition '" + partition_id + "' are already on disk '" + disk->getName() + "'"; - - throw Exception(no_parts_to_move_message, ErrorCodes::UNKNOWN_DISK); + throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on disk '{}'", partition_id, disk->getName()); } if (!movePartsToSpace(parts, std::static_pointer_cast(disk))) - throw Exception("Cannot move parts because moves are manually disabled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); } @@ -4597,15 +4590,13 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String { String no_parts_to_move_message; if (moving_part) - no_parts_to_move_message = "Part '" + partition_id + "' is already on volume '" + volume->getName() + "'"; + throw Exception(ErrorCodes::UNKNOWN_DISK, "Part '{}' is already on volume '{}'", partition_id, volume->getName()); else - no_parts_to_move_message = "All parts of partition '" + partition_id + "' are already on volume '" + volume->getName() + "'"; - - throw Exception(no_parts_to_move_message, ErrorCodes::UNKNOWN_DISK); + throw Exception(ErrorCodes::UNKNOWN_DISK, "All parts of partition '{}' are already on volume '{}'", partition_id, volume->getName()); } if (!movePartsToSpace(parts, std::static_pointer_cast(volume))) - throw Exception("Cannot move parts because moves are manually disabled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cannot move parts because moves are manually disabled"); } void MergeTreeData::movePartitionToShard(const ASTPtr & /*partition*/, bool /*move_part*/, const String & /*to*/, ContextPtr /*query_context*/) @@ -4756,7 +4747,7 @@ Pipe MergeTreeData::alterPartition( break; default: - throw Exception("Uninitialized partition command", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uninitialized partition command"); } for (auto & command_result : current_command_results) command_result.command_type = command.typeToString(); @@ -5031,7 +5022,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc const auto & partition_ast = ast->as(); if (partition_ast.all) - throw Exception("Only Support DETACH PARTITION ALL currently", ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Only Support DETACH PARTITION ALL currently"); if (!partition_ast.value) { @@ -5357,12 +5348,11 @@ DetachedPartsInfo MergeTreeData::getDetachedParts() const void MergeTreeData::validateDetachedPartName(const String & name) { if (name.find('/') != std::string::npos || name == "." || name == "..") - throw DB::Exception("Invalid part name '" + name + "'", ErrorCodes::INCORRECT_FILE_NAME); + throw DB::Exception(ErrorCodes::INCORRECT_FILE_NAME, "Invalid part name '{}'", name); if (startsWith(name, "attaching_") || startsWith(name, "deleting_")) - throw DB::Exception("Cannot drop part " + name + ": " - "most likely it is used by another DROP or ATTACH query.", - ErrorCodes::BAD_DATA_PART_NAME); + throw DB::Exception(ErrorCodes::BAD_DATA_PART_NAME, "Cannot drop part {}: " + "most likely it is used by another DROP or ATTACH query.", name); } void MergeTreeData::dropDetached(const ASTPtr & partition, bool part, ContextPtr local_context) @@ -5485,7 +5475,7 @@ inline ReservationPtr checkAndReturnReservation(UInt64 expected_size, Reservatio if (reservation) return reservation; - throw Exception(fmt::format("Cannot reserve {}, not enough space", ReadableSize(expected_size)), ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Cannot reserve {}, not enough space", ReadableSize(expected_size)); } } @@ -6106,9 +6096,9 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( ContextPtr query_context) const { if (!metadata_snapshot->minmax_count_projection) - throw Exception( - "Cannot find the definition of minmax_count projection but it's used in current query. It's a bug", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot find the definition of minmax_count projection but it's used in current query. " + "It's a bug"); auto block = metadata_snapshot->minmax_count_projection->sample_block.cloneEmpty(); bool need_primary_key_max_column = false; @@ -6205,7 +6195,7 @@ Block MergeTreeData::getMinMaxCountProjectionBlock( continue; if (!part->minmax_idx->initialized) - throw Exception("Found a non-empty part with uninitialized minmax_idx. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Found a non-empty part with uninitialized minmax_idx. It's a bug"); filter_column_data.emplace_back(); @@ -6838,12 +6828,12 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour { MergeTreeData * src_data = dynamic_cast(&source_table); if (!src_data) - throw Exception("Table " + source_table.getStorageID().getNameForLogs() + - " supports attachPartitionFrom only for MergeTree family of table engines." - " Got " + source_table.getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Table {} supports attachPartitionFrom only for MergeTree family of table engines. Got {}", + source_table.getStorageID().getNameForLogs(), source_table.getName()); if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) - throw Exception("Tables have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS); + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Tables have different structure"); auto query_to_string = [] (const ASTPtr & ast) { @@ -6851,16 +6841,16 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour }; if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST())) - throw Exception("Tables have different ordering", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different ordering"); if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST())) - throw Exception("Tables have different partition key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different partition key"); if (format_version != src_data->format_version) - throw Exception("Tables have different format_version", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different format_version"); if (query_to_string(my_snapshot->getPrimaryKeyAST()) != query_to_string(src_snapshot->getPrimaryKeyAST())) - throw Exception("Tables have different primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Tables have different primary key"); return *src_data; } @@ -7310,7 +7300,7 @@ MergeTreeData::CurrentlyMovingPartsTagger::CurrentlyMovingPartsTagger(MergeTreeM { for (const auto & moving_part : parts_to_move) if (!data.currently_moving_parts.emplace(moving_part.part).second) - throw Exception("Cannot move part '" + moving_part.part->name + "'. It's already moving.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot move part '{}'. It's already moving.", moving_part.part->name); } MergeTreeData::CurrentlyMovingPartsTagger::~CurrentlyMovingPartsTagger() @@ -7399,19 +7389,17 @@ MergeTreeData::CurrentlyMovingPartsTaggerPtr MergeTreeData::checkPartsForMove(co { auto reservation = space->reserve(part->getBytesOnDisk()); if (!reservation) - throw Exception("Move is not possible. Not enough space on '" + space->getName() + "'", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Move is not possible. Not enough space on '{}'", space->getName()); auto reserved_disk = reservation->getDisk(); if (reserved_disk->exists(relative_data_path + part->name)) - throw Exception( - "Move is not possible: " + fullPath(reserved_disk, relative_data_path + part->name) + " already exists", - ErrorCodes::DIRECTORY_ALREADY_EXISTS); + throw Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Move is not possible: {} already exists", + fullPath(reserved_disk, relative_data_path + part->name)); if (currently_moving_parts.contains(part) || partIsAssignedToBackgroundOperation(part)) - throw Exception( - "Cannot move part '" + part->name + "' because it's participating in background process", - ErrorCodes::PART_IS_TEMPORARILY_LOCKED); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, + "Cannot move part '{}' because it's participating in background process", part->name); parts_to_move.emplace_back(part, std::move(reservation)); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 8ced76b0bc5..2e57d58ef41 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -1089,7 +1090,7 @@ protected: MultiVersion storage_settings; /// Used to determine which UUIDs to send to root query executor for deduplication. - mutable std::shared_mutex pinned_part_uuids_mutex; + mutable SharedMutex pinned_part_uuids_mutex; PinnedPartUUIDsPtr pinned_part_uuids; /// True if at least one part was created/removed with transaction. @@ -1197,23 +1198,23 @@ protected: void modifyPartState(DataPartIteratorByStateAndInfo it, DataPartState state) { if (!data_parts_by_state_and_info.modify(it, getStateModifier(state))) - throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't modify {}", (*it)->getNameWithState()); } void modifyPartState(DataPartIteratorByInfo it, DataPartState state) { if (!data_parts_by_state_and_info.modify(data_parts_indexes.project(it), getStateModifier(state))) - throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't modify {}", (*it)->getNameWithState()); } void modifyPartState(const DataPartPtr & part, DataPartState state) { auto it = data_parts_by_info.find(part->info); if (it == data_parts_by_info.end() || (*it).get() != part.get()) - throw Exception("Part " + part->name + " doesn't exist", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} doesn't exist", part->name); if (!data_parts_by_state_and_info.modify(data_parts_indexes.project(it), getStateModifier(state))) - throw Exception("Can't modify " + (*it)->getNameWithState(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't modify {}", (*it)->getNameWithState()); } /// Used to serialize calls to grabOldParts. diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index dee3b1d9967..2ffc6dc818e 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -373,7 +373,7 @@ SelectPartsDecision MergeTreeDataMergerMutator::selectPartsToMerge( /// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl if (parts_to_merge.size() == 1) - throw Exception("Logical error: merge selector returned only one part to merge", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge selector returned only one part to merge"); if (parts_to_merge.empty()) { @@ -631,8 +631,8 @@ MergeTreeData::DataPartPtr MergeTreeDataMergerMutator::renameMergedTemporaryPart { for (size_t i = 0; i < parts.size(); ++i) if (parts[i]->name != replaced_parts[i]->name) - throw Exception("Unexpected part removed when adding " + new_data_part->name + ": " + replaced_parts[i]->name - + " instead of " + parts[i]->name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected part removed when adding {}: {} instead of {}", + new_data_part->name, replaced_parts[i]->name, parts[i]->name); } LOG_TRACE(log, "Merged {} parts: [{}, {}] -> {}", parts.size(), parts.front()->name, parts.back()->name, new_data_part->name); diff --git a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp index 81f884ef45a..4c7da39cc22 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartChecksum.cpp @@ -30,30 +30,30 @@ void MergeTreeDataPartChecksum::checkEqual(const MergeTreeDataPartChecksum & rhs if (is_compressed && have_uncompressed) { if (!rhs.is_compressed) - throw Exception("No uncompressed checksum for file " + name, ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "No uncompressed checksum for file {}", name); if (rhs.uncompressed_size != uncompressed_size) - throw Exception("Unexpected uncompressed size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected uncompressed size of file {} in data part", name); if (rhs.uncompressed_hash != uncompressed_hash) - throw Exception("Checksum mismatch for uncompressed file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for uncompressed file {} in data part", name); return; } if (rhs.file_size != file_size) - throw Exception("Unexpected size of file " + name + " in data part", ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "Unexpected size of file {} in data part", name); if (rhs.file_hash != file_hash) - throw Exception("Checksum mismatch for file " + name + " in data part", ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksum mismatch for file {} in data part", name); } void MergeTreeDataPartChecksum::checkSize(const DiskPtr & disk, const String & path) const { if (!disk->exists(path)) - throw Exception(fullPath(disk, path) + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "{} doesn't exist", fullPath(disk, path)); if (disk->isDirectory(path)) // This is a projection, no need to check its size. return; UInt64 size = disk->getFileSize(path); if (size != file_size) - throw Exception(fullPath(disk, path) + " has unexpected size: " + toString(size) + " instead of " + toString(file_size), - ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::BAD_SIZE_OF_FILE_IN_DATA_PART, "{} has unexpected size: {} instead of {}", + fullPath(disk, path), size, file_size); } @@ -64,7 +64,7 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r const String & name = it.first; if (!files.contains(name)) - throw Exception("Unexpected file " + name + " in data part", ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::UNEXPECTED_FILE_IN_DATA_PART, "Unexpected file {} in data part", name); } for (const auto & it : files) @@ -73,7 +73,7 @@ void MergeTreeDataPartChecksums::checkEqual(const MergeTreeDataPartChecksums & r auto jt = rhs.files.find(name); if (jt == rhs.files.end()) - throw Exception("No file " + name + " in data part", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No file {} in data part", name); it.second.checkEqual(jt->second, have_uncompressed, name); } @@ -109,7 +109,7 @@ bool MergeTreeDataPartChecksums::read(ReadBuffer & in, size_t format_version) case 4: return readV4(in); default: - throw Exception("Bad checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT); + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Bad checksums format version: {}", DB::toString(format_version)); } } @@ -271,7 +271,7 @@ MergeTreeDataPartChecksums MergeTreeDataPartChecksums::deserializeFrom(const Str ReadBufferFromString in(s); MergeTreeDataPartChecksums res; if (!res.read(in)) - throw Exception("Checksums format is too old", ErrorCodes::FORMAT_VERSION_TOO_OLD); + throw Exception(ErrorCodes::FORMAT_VERSION_TOO_OLD, "Checksums format is too old"); assertEOF(in); return res; } @@ -370,7 +370,7 @@ bool MinimalisticDataPartChecksums::deserialize(ReadBuffer & in) } if (format_version > MINIMAL_VERSION_WITH_MINIMALISTIC_CHECKSUMS) - throw Exception("Unknown checksums format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT); + throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown checksums format version: {}", DB::toString(format_version)); deserializeWithoutHeader(in); @@ -482,11 +482,7 @@ void MinimalisticDataPartChecksums::checkEqualImpl(const MinimalisticDataPartChe if (!errors.empty()) { - String error_msg = "Checksums of parts don't match: " + errors.front(); - for (size_t i = 1; i < errors.size(); ++i) - error_msg += ", " + errors[i]; - - throw Exception(error_msg, ErrorCodes::CHECKSUM_DOESNT_MATCH); + throw Exception(ErrorCodes::CHECKSUM_DOESNT_MATCH, "Checksums of parts don't match: {}", fmt::join(errors, ", ")); } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index a537b44d9ea..2da1ea4aa0f 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -99,7 +99,7 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( size_t columns_count, const IDataPartStorage & data_part_storage_) { if (!index_granularity_info_.mark_type.adaptive) - throw Exception("MergeTreeDataPartCompact cannot be created with non-adaptive granulary.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergeTreeDataPartCompact cannot be created with non-adaptive granulary."); auto marks_file_path = index_granularity_info_.getMarksFilePath("data"); if (!data_part_storage_.exists(marks_file_path)) @@ -137,7 +137,7 @@ void MergeTreeDataPartCompact::loadIndexGranularityImpl( void MergeTreeDataPartCompact::loadIndexGranularity() { if (columns.empty()) - throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); loadIndexGranularityImpl(index_granularity, index_granularity_info, columns.size(), getDataPartStorage()); } @@ -162,7 +162,7 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons { /// count.txt should be present even in non custom-partitioned parts if (!checksums.files.contains("count.txt")) - throw Exception("No checksum for count.txt", ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No checksum for count.txt"); if (require_part_metadata) { @@ -210,7 +210,7 @@ void MergeTreeDataPartCompact::checkConsistency(bool require_part_metadata) cons "Part {} is broken: bad size of marks file '{}': {}, must be: {}", getDataPartStorage().getRelativePath(), std::string(fs::path(getDataPartStorage().getFullPath()) / mrk_file_name), - std::to_string(file_size), std::to_string(expected_file_size)); + file_size, expected_file_size); } } } diff --git a/src/Storages/MergeTree/MergeTreeDataPartType.cpp b/src/Storages/MergeTree/MergeTreeDataPartType.cpp index 59cea62121b..ac277233d3c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartType.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartType.cpp @@ -14,7 +14,7 @@ void MergeTreeDataPartType::fromString(const String & str) { auto maybe_value = magic_enum::enum_cast(str); if (!maybe_value || *maybe_value == Value::Unknown) - throw DB::Exception("Unexpected string for part type: " + str, ErrorCodes::UNKNOWN_PART_TYPE); + throw DB::Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unexpected string for part type: {}", str); value = *maybe_value; } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index 2418960f992..a31b88365c2 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -160,7 +160,7 @@ void MergeTreeDataPartWide::loadIndexGranularityImpl( void MergeTreeDataPartWide::loadIndexGranularity() { if (columns.empty()) - throw Exception("No columns in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); + throw Exception(ErrorCodes::NO_FILE_IN_DATA_PART, "No columns in part {}", name); loadIndexGranularityImpl(index_granularity, index_granularity_info, getDataPartStorage(), getFileNameForColumn(columns.front())); } @@ -302,7 +302,8 @@ void MergeTreeDataPartWide::calculateEachColumnSizes(ColumnSizeByName & each_col throw Exception( ErrorCodes::LOGICAL_ERROR, "Column {} has rows count {} according to size in memory " - "and size of single value, but data part {} has {} rows", backQuote(column.name), rows_in_column, name, rows_count); + "and size of single value, but data part {} has {} rows", + backQuote(column.name), rows_in_column, name, rows_count); } } #endif diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 4c1d117ac73..692aed8a116 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -87,7 +87,9 @@ namespace Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, size_t block_rows, size_t current_mark, bool last_block) { if (current_mark >= index_granularity.getMarksCount()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Request to get granules from mark {} but index granularity size is {}", current_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Request to get granules from mark {} but index granularity size is {}", + current_mark, index_granularity.getMarksCount()); Granules result; size_t current_row = 0; @@ -99,7 +101,9 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, { /// Invariant: we always have equal amount of rows for block in compact parts because we accumulate them in buffer. /// The only exclusion is the last block, when we cannot accumulate more rows. - throw Exception(ErrorCodes::LOGICAL_ERROR, "Required to write {} rows, but only {} rows was written for the non last granule", expected_rows_in_mark, rows_left_in_block); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Required to write {} rows, but only {} rows was written for the non last granule", + expected_rows_in_mark, rows_left_in_block); } result.emplace_back(Granule{ diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp index 8066a097499..9afa7a1e80d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterInMemory.cpp @@ -22,7 +22,7 @@ void MergeTreeDataPartWriterInMemory::write( const Block & block, const IColumn::Permutation * permutation) { if (part_in_memory->block) - throw Exception("DataPartWriterInMemory supports only one write", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "DataPartWriterInMemory supports only one write"); Block primary_key_block; if (settings.rewrite_primary_key) diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp index e9629f83d09..1f40177d0fa 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include "IO/WriteBufferFromFileDecorator.h" @@ -112,7 +112,8 @@ MergeTreeDataPartWriterOnDisk::MergeTreeDataPartWriterOnDisk( , compress_primary_key(settings.compress_primary_key) { if (settings.blocks_are_granules_size && !index_granularity.empty()) - throw Exception("Can't take information about index granularity from blocks, when non empty index_granularity array specified", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Can't take information about index granularity from blocks, when non empty index_granularity array specified"); if (!data_part->getDataPartStorage().exists()) data_part->getDataPartStorage().createDirectories(); @@ -214,7 +215,7 @@ void MergeTreeDataPartWriterOnDisk::initSkipIndices() settings.query_write_settings)); GinIndexStorePtr store = nullptr; - if (dynamic_cast(&*index_helper) != nullptr) + if (dynamic_cast(&*index_helper) != nullptr) { store = std::make_shared(stream_name, data_part->getDataPartStoragePtr(), data_part->getDataPartStoragePtr(), storage.getSettings()->max_digestion_size_per_segment); gin_index_stores[stream_name] = store; @@ -275,15 +276,13 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block auto & stream = *skip_indices_streams[i]; WriteBuffer & marks_out = stream.compress_marks ? stream.marks_compressed_hashing : stream.marks_hashing; - GinIndexStorePtr store = nullptr; - if (dynamic_cast(&*index_helper) != nullptr) + GinIndexStorePtr store; + if (dynamic_cast(&*index_helper) != nullptr) { String stream_name = index_helper->getFileName(); auto it = gin_index_stores.find(stream_name); - if (it == gin_index_stores.cend()) - { - throw Exception("Index '" + stream_name + "' does not exist", ErrorCodes::LOGICAL_ERROR); - } + if (it == gin_index_stores.end()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Index '{}' does not exist", stream_name); store = it->second; } @@ -400,9 +399,7 @@ void MergeTreeDataPartWriterOnDisk::finishSkipIndicesSerialization(bool sync) stream->sync(); } for (auto & store: gin_index_stores) - { store.second->finalize(); - } gin_index_stores.clear(); skip_indices_streams.clear(); skip_indices_aggregators.clear(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h index 2377a129ac0..b76b74ab717 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h @@ -162,7 +162,7 @@ protected: /// Data is already written up to this mark. size_t current_mark = 0; - GinIndexStores gin_index_stores; + GinIndexStoreFactory::GinIndexStores gin_index_stores; private: void initSkipIndices(); void initPrimaryIndex(); diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 62917bcb084..cce459c1ba8 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -26,7 +26,9 @@ namespace Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity, size_t block_rows, size_t current_mark, size_t rows_written_in_last_mark) { if (current_mark >= index_granularity.getMarksCount()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Request to get granules from mark {} but index granularity size is {}", current_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Request to get granules from mark {} but index granularity size is {}", + current_mark, index_granularity.getMarksCount()); Granules result; size_t current_row = 0; @@ -157,9 +159,9 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri { if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size) throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granules are not allowed while blocks are granules size. " - "Mark number {} (rows {}), rows written in last mark {}, rows to write in last mark from block {} (from row {}), total marks currently {}", - last_granule.mark_number, index_granularity.getMarkRows(last_granule.mark_number), rows_written_in_last_mark, - last_granule.rows_to_write, last_granule.start_row, index_granularity.getMarksCount()); + "Mark number {} (rows {}), rows written in last mark {}, rows to write in last mark from block {} (from row {}), " + "total marks currently {}", last_granule.mark_number, index_granularity.getMarkRows(last_granule.mark_number), + rows_written_in_last_mark, last_granule.rows_to_write, last_granule.start_row, index_granularity.getMarksCount()); /// Shift forward except last granule setCurrentMark(getCurrentMark() + granules_written.size() - 1); @@ -345,7 +347,8 @@ void MergeTreeDataPartWriterWide::writeColumn( const Granules & granules) { if (granules.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty granules for column {}, current mark {}", backQuoteIfNeed(name_and_type.name), getCurrentMark()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty granules for column {}, current mark {}", + backQuoteIfNeed(name_and_type.name), getCurrentMark()); const auto & [name, type] = name_and_type; auto [it, inserted] = serialization_states.emplace(name, nullptr); @@ -371,7 +374,10 @@ void MergeTreeDataPartWriterWide::writeColumn( if (granule.mark_on_start) { if (last_non_written_marks.contains(name)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "We have to add new mark for column, but already have non written mark. Current mark {}, total marks {}, offset {}", getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "We have to add new mark for column, but already have non written mark. " + "Current mark {}, total marks {}, offset {}", + getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark); last_non_written_marks[name] = getCurrentMarksForColumn(name_and_type, offset_columns); } @@ -442,7 +448,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai for (mark_num = 0; !mrk_in->eof(); ++mark_num) { if (mark_num > index_granularity.getMarksCount()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incorrect number of marks in memory {}, on disk (at least) {}", index_granularity.getMarksCount(), mark_num + 1); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incorrect number of marks in memory {}, on disk (at least) {}", + index_granularity.getMarksCount(), mark_num + 1); DB::readBinary(offset_in_compressed_file, *mrk_in); DB::readBinary(offset_in_decompressed_block, *mrk_in); @@ -454,7 +462,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai if (must_be_last) { if (index_granularity_rows != 0) - throw Exception(ErrorCodes::LOGICAL_ERROR, "We ran out of binary data but still have non empty mark #{} with rows number {}", mark_num, index_granularity_rows); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "We ran out of binary data but still have non empty mark #{} with rows number {}", + mark_num, index_granularity_rows); if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark #{} must be last, but we still have some to read", mark_num); @@ -469,20 +479,28 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai serialization->deserializeBinaryBulk(*column, bin_in, 1000000000, 0.0); throw Exception(ErrorCodes::LOGICAL_ERROR, - "Still have {} rows in bin stream, last mark #{} index granularity size {}, last rows {}", column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); + "Still have {} rows in bin stream, last mark #{}" + " index granularity size {}, last rows {}", + column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); } if (index_granularity_rows > data_part->index_granularity_info.fixed_index_granularity) { throw Exception(ErrorCodes::LOGICAL_ERROR, "Mark #{} has {} rows, but max fixed granularity is {}, index granularity size {}", - mark_num, index_granularity_rows, data_part->index_granularity_info.fixed_index_granularity, index_granularity.getMarksCount()); + mark_num, index_granularity_rows, data_part->index_granularity_info.fixed_index_granularity, + index_granularity.getMarksCount()); } if (index_granularity_rows != index_granularity.getMarkRows(mark_num)) throw Exception( - ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{} (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", - data_part->getDataPartStorage().getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount()); + ErrorCodes::LOGICAL_ERROR, + "Incorrect mark rows for part {} for mark #{}" + " (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}", + data_part->getDataPartStorage().getFullPath(), + mark_num, offset_in_compressed_file, offset_in_decompressed_block, + index_granularity.getMarkRows(mark_num), index_granularity_rows, + index_granularity.getMarksCount()); auto column = type->createColumn(); @@ -513,14 +531,18 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai } throw Exception( - ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for mark #{} (compressed offset {}, decompressed offset {}), actually in bin file {}, in mrk file {}, total marks {}", - mark_num, offset_in_compressed_file, offset_in_decompressed_block, column->size(), index_granularity.getMarkRows(mark_num), index_granularity.getMarksCount()); + ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for mark #{} (compressed offset {}, decompressed offset {}), " + "actually in bin file {}, in mrk file {}, total marks {}", + mark_num, offset_in_compressed_file, offset_in_decompressed_block, column->size(), + index_granularity.getMarkRows(mark_num), index_granularity.getMarksCount()); } } if (!mrk_in->eof()) throw Exception(ErrorCodes::LOGICAL_ERROR, - "Still have something in marks stream, last mark #{} index granularity size {}, last rows {}", mark_num, index_granularity.getMarksCount(), index_granularity_rows); + "Still have something in marks stream, last mark #{}" + " index granularity size {}, last rows {}", + mark_num, index_granularity.getMarksCount(), index_granularity_rows); if (!bin_in.eof()) { auto column = type->createColumn(); @@ -528,7 +550,9 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const NameAndTypePai serialization->deserializeBinaryBulk(*column, bin_in, 1000000000, 0.0); throw Exception(ErrorCodes::LOGICAL_ERROR, - "Still have {} rows in bin stream, last mark #{} index granularity size {}, last rows {}", column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); + "Still have {} rows in bin stream, last mark #{}" + " index granularity size {}, last rows {}", + column->size(), mark_num, index_granularity.getMarksCount(), index_granularity_rows); } } @@ -543,9 +567,11 @@ void MergeTreeDataPartWriterWide::fillDataChecksums(IMergeTreeDataPart::Checksum if (rows_written_in_last_mark > 0) { if (settings.can_use_adaptive_granularity && settings.blocks_are_granules_size) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Incomplete granule is not allowed while blocks are granules size even for last granule. " - "Mark number {} (rows {}), rows written for last mark {}, total marks {}", - getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), rows_written_in_last_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Incomplete granule is not allowed while blocks are granules size even for last granule. " + "Mark number {} (rows {}), rows written for last mark {}, total marks {}", + getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), + rows_written_in_last_mark, index_granularity.getMarksCount()); adjustLastMarkIfNeedAndFlushToDisk(rows_written_in_last_mark); } @@ -656,7 +682,8 @@ static void fillIndexGranularityImpl( void MergeTreeDataPartWriterWide::fillIndexGranularity(size_t index_granularity_for_block, size_t rows_in_block) { if (getCurrentMark() < index_granularity.getMarksCount() && getCurrentMark() != index_granularity.getMarksCount() - 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to add marks, while current mark {}, but total marks {}", getCurrentMark(), index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to add marks, while current mark {}, but total marks {}", + getCurrentMark(), index_granularity.getMarksCount()); size_t index_offset = 0; if (rows_written_in_last_mark != 0) @@ -683,8 +710,10 @@ void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_ if (compute_granularity && settings.can_use_adaptive_granularity) { if (getCurrentMark() != index_granularity.getMarksCount() - 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Non last mark {} (with {} rows) having rows offset {}, total marks {}", - getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), rows_written_in_last_mark, index_granularity.getMarksCount()); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Non last mark {} (with {} rows) having rows offset {}, total marks {}", + getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), + rows_written_in_last_mark, index_granularity.getMarksCount()); index_granularity.popMark(); index_granularity.appendMark(new_rows_in_last_mark); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9cae53c71c7..512f194ea53 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include @@ -171,9 +171,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read( if (plan->isInitialized() && settings.allow_experimental_projection_optimization && settings.force_optimize_projection && !metadata_for_reading->projections.empty()) - throw Exception( - "No projection is used when allow_experimental_projection_optimization = 1 and force_optimize_projection = 1", - ErrorCodes::PROJECTION_NOT_USED); + throw Exception(ErrorCodes::PROJECTION_NOT_USED, + "No projection is used when allow_experimental_projection_optimization = 1 and force_optimize_projection = 1"); return plan; } @@ -520,14 +519,14 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( relative_sample_size.assign(sample_size_ratio->numerator, sample_size_ratio->denominator); if (relative_sample_size < 0) - throw Exception("Negative sample size", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Negative sample size"); relative_sample_offset = 0; if (sample_offset_ratio) relative_sample_offset.assign(sample_offset_ratio->numerator, sample_offset_ratio->denominator); if (relative_sample_offset < 0) - throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Negative sample offset"); /// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to /// read) into the relative `SAMPLE 0.1` (how much data to read). @@ -546,7 +545,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( relative_sample_size = 0; if (relative_sample_offset > 0 && RelativeSize(0) == relative_sample_size) - throw Exception("Sampling offset is incorrect because no sampling", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Sampling offset is incorrect because no sampling"); if (relative_sample_offset > 1) { @@ -623,10 +622,9 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( } if (size_of_universum == RelativeSize(0)) - throw Exception( - "Invalid sampling column type in storage parameters: " + sampling_column_type->getName() - + ". Must be one unsigned integer type", - ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER, + "Invalid sampling column type in storage parameters: {}. Must be one unsigned integer type", + sampling_column_type->getName()); if (settings.parallel_replicas_count > 1) { @@ -695,7 +693,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( { if (!key_condition.addCondition( sampling_key.column_names[0], Range::createLeftBounded(lower, true, sampling_key.data_types[0]->isNullable()))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Sampling column not in primary key"); ASTPtr args = std::make_shared(); args->children.push_back(sampling_key_ast); @@ -713,7 +711,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling( { if (!key_condition.addCondition( sampling_key.column_names[0], Range::createRightBounded(upper, false, sampling_key.data_types[0]->isNullable()))) - throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Sampling column not in primary key"); ASTPtr args = std::make_shared(); args->children.push_back(sampling_key_ast); @@ -1691,10 +1689,8 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( PostingsCacheForStore cache_in_store; - if (dynamic_cast(&*index_helper) != nullptr) - { + if (dynamic_cast(&*index_helper) != nullptr) cache_in_store.store = GinIndexStoreFactory::instance().get(index_helper->getFileName(), part->getDataPartStoragePtr()); - } for (size_t i = 0; i < ranges.size(); ++i) { @@ -1709,7 +1705,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( { if (index_mark != index_range.begin || !granule || last_index_mark != index_range.begin) granule = reader.read(); - const auto * gin_filter_condition = dynamic_cast(&*condition); + const auto * gin_filter_condition = dynamic_cast(&*condition); // Cast to Ann condition auto ann_condition = std::dynamic_pointer_cast(condition); if (ann_condition != nullptr) @@ -1736,7 +1732,7 @@ MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex( continue; } - bool result{false}; + bool result = false; if (!gin_filter_condition) result = condition->mayBeTrueOnGranule(granule); else @@ -1988,7 +1984,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( { auto result = temp_part_uuids.insert(part->uuid); if (!result.second) - throw Exception("Found a part with the same UUID on the same replica.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Found a part with the same UUID on the same replica."); } selected_parts.push_back(part_or_projection); @@ -2022,7 +2018,7 @@ void MergeTreeDataSelectExecutor::selectPartsToReadWithUUIDFilter( /// Second attempt didn't help, throw an exception if (!select_parts(parts)) - throw Exception("Found duplicate UUIDs while processing query.", ErrorCodes::DUPLICATED_PART_UUIDS); + throw Exception(ErrorCodes::DUPLICATED_PART_UUIDS, "Found duplicate UUIDs while processing query."); } } diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 03a3d4fbd72..125d7c12a1a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -76,7 +76,15 @@ void buildScatterSelector( if (inserted) { if (max_parts && partitions_count >= max_parts) - throw Exception("Too many partitions for single INSERT block (more than " + toString(max_parts) + "). The limit is controlled by 'max_partitions_per_insert_block' setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).", ErrorCodes::TOO_MANY_PARTS); + throw Exception(ErrorCodes::TOO_MANY_PARTS, + "Too many partitions for single INSERT block (more than {}). " + "The limit is controlled by 'max_partitions_per_insert_block' setting. " + "Large number of partitions is a common misconception. " + "It will lead to severe negative performance impact, including slow server startup, " + "slow INSERT queries and slow SELECT queries. Recommended total number of partitions " + "for a table is under 1000..10000. Please note, that partitioning is not intended " + "to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). " + "Partitions are intended for data manipulation (DROP PARTITION, etc).", max_parts); partition_num_to_first_row.push_back(i); it->getMapped() = partitions_count; @@ -129,10 +137,10 @@ void updateTTL( ttl_info.update(column_const->getValue()); } else - throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column"); } else - throw Exception("Unexpected type of result TTL column", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected type of result TTL column"); if (update_part_min_max_ttls) ttl_infos.updatePartMinMaxTTL(ttl_info.min, ttl_info.max); @@ -309,13 +317,13 @@ Block MergeTreeDataWriter::mergeBlock( /// Check that after first merge merging_algorithm is waiting for data from input 0. if (status.required_source != 0) - throw Exception("Logical error: required source after the first merge is not 0.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: required source after the first merge is not 0."); status = merging_algorithm->merge(); /// Check that merge is finished. if (!status.is_finished) - throw Exception("Logical error: merge is not finished after the second merge.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: merge is not finished after the second merge."); /// Merged Block is sorted and we don't need to use permutation anymore permutation = nullptr; @@ -364,7 +372,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl( auto max_month = date_lut.toNumYYYYMM(max_date); if (min_month != max_month) - throw Exception("Logical error: part spans more than one month.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: part spans more than one month."); part_name = new_part_info.getPartNameV0(min_date, max_date); } diff --git a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp index 9bc0e4e6dc0..fe5a2a861f6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAggregatorBloomFilter.cpp @@ -41,8 +41,8 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorBloomFilter::getGranuleAndReset void MergeTreeIndexAggregatorBloomFilter::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception("The provided position is not less than the number of block rows. Position: " + toString(*pos) + ", Block rows: " + - toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); Block granule_index_block; size_t max_read_rows = std::min(block.rows() - *pos, limit); diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp index 4dd0614015c..f64d6104ac6 100644 --- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp @@ -142,7 +142,7 @@ void MergeTreeIndexAggregatorAnnoy::update(const Block & block, size_t return; if (index_sample_block.columns() > 1) - throw Exception("Only one column is supported", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Only one column is supported"); auto index_column_name = index_sample_block.getByPosition(0).name; const auto & column_cut = block.getByName(index_column_name).column->cut(*pos, rows_read); @@ -208,7 +208,7 @@ MergeTreeIndexConditionAnnoy::MergeTreeIndexConditionAnnoy( bool MergeTreeIndexConditionAnnoy::mayBeTrueOnGranule(MergeTreeIndexGranulePtr /* idx_granule */) const { - throw Exception("mayBeTrueOnGranule is not supported for ANN skip indexes", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "mayBeTrueOnGranule is not supported for ANN skip indexes"); } bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const @@ -248,13 +248,14 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRangesImpl(MergeTreeI auto granule = std::dynamic_pointer_cast >(idx_granule); if (granule == nullptr) - throw Exception("Granule has the wrong type", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Granule has the wrong type"); auto annoy = granule->index; if (condition.getNumOfDimensions() != annoy->getNumOfDimensions()) - throw Exception("The dimension of the space in the request (" + toString(condition.getNumOfDimensions()) + ") " - + "does not match with the dimension in the index (" + toString(annoy->getNumOfDimensions()) + ")", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "The dimension of the space in the request ({}) " + "does not match with the dimension in the index ({})", + toString(condition.getNumOfDimensions()), toString(annoy->getNumOfDimensions())); /// neighbors contain indexes of dots which were closest to target vector std::vector neighbors; @@ -273,7 +274,7 @@ std::vector MergeTreeIndexConditionAnnoy::getUsefulRangesImpl(MergeTreeI } catch (...) { - throw Exception("Setting of the annoy index should be int", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Setting of the annoy index should be int"); } } annoy->get_nns_by_vector(target_vec.data(), limit, k_search, &neighbors, &distances); @@ -333,12 +334,12 @@ MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index) { if (!index.arguments[0].tryGet(distance_name)) { - throw Exception("Can't parse first argument", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse first argument"); } } if (index.arguments.size() > 1 && !index.arguments[1].tryGet(distance_name)) { - throw Exception("Can't parse second argument", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, "Can't parse second argument"); } return std::make_shared(index, param, distance_name); } @@ -381,20 +382,20 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */) { if (index.arguments.size() > 2) { - throw Exception("Annoy index must not have more than two parameters", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index must not have more than two parameters"); } if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64 && index.arguments[0].getType() != Field::Types::String) { - throw Exception("Annoy index first argument must be UInt64 or String.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index first argument must be UInt64 or String."); } if (index.arguments.size() > 1 && index.arguments[1].getType() != Field::Types::String) { - throw Exception("Annoy index second argument must be String.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Annoy index second argument must be String."); } if (index.column_names.size() != 1 || index.data_types.size() != 1) - throw Exception("Annoy indexes must be created on a single column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Annoy indexes must be created on a single column"); assertIndexColumnsType(index.sample_block); } diff --git a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp index c2ed081ac00..7a8a28b24aa 100644 --- a/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexBloomFilter.cpp @@ -78,7 +78,7 @@ MergeTreeIndexConditionPtr MergeTreeIndexBloomFilter::createIndexCondition(const static void assertIndexColumnsType(const Block & header) { if (!header || !header.columns()) - throw Exception("Index must have columns.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Index must have columns."); const DataTypes & columns_data_types = header.getDataTypes(); @@ -89,8 +89,7 @@ static void assertIndexColumnsType(const Block & header) if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() && !which.isDate() && !which.isDateTime() && !which.isDateTime64() && !which.isEnum() && !which.isUUID()) - throw Exception("Unexpected type " + type->getName() + " of bloom filter index.", - ErrorCodes::ILLEGAL_COLUMN); + throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Unexpected type {} of bloom filter index.", type->getName()); } } @@ -118,7 +117,7 @@ void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach) if (index.arguments.size() > 1) { if (!attach) /// This is for backward compatibility. - throw Exception("BloomFilter index cannot have more than one parameter.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "BloomFilter index cannot have more than one parameter."); } if (!index.arguments.empty()) @@ -126,7 +125,7 @@ void bloomFilterIndexValidatorNew(const IndexDescription & index, bool attach) const auto & argument = index.arguments[0]; if (!attach && (argument.getType() != Field::Types::Float64 || argument.get() < 0 || argument.get() > 1)) - throw Exception("The BloomFilter false positive must be a double number between 0 and 1.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The BloomFilter false positive must be a double number between 0 and 1."); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp index 4fd4314e53f..235d90bb974 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.cpp @@ -58,7 +58,7 @@ bool maybeTrueOnBloomFilter(const IColumn * hash_column, const BloomFilterPtr & const auto * non_const_column = typeid_cast(hash_column); if (!const_column && !non_const_column) - throw Exception("LOGICAL ERROR: hash column must be Const Column or UInt64 Column.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: hash column must be Const Column or UInt64 Column."); if (const_column) { @@ -174,7 +174,7 @@ bool MergeTreeIndexConditionBloomFilter::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 || arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } return rpn_stack[0]; @@ -245,11 +245,11 @@ bool MergeTreeIndexConditionBloomFilter::mayBeTrueOnGranule(const MergeTreeIndex rpn_stack.emplace_back(false, true); } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in KeyCondition::mayBeTrueInRange", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in KeyCondition::mayBeTrueInRange"); return rpn_stack[0].can_be_true; } @@ -574,7 +574,7 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( if (function_name == "has" || function_name == "indexOf") { if (!array_type) - throw Exception("First argument for function " + function_name + " must be an array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First argument for function {} must be an array.", function_name); /// We can treat `indexOf` function similar to `has`. /// But it is little more cumbersome, compare: `has(arr, elem)` and `indexOf(arr, elem) != 0`. @@ -627,7 +627,8 @@ bool MergeTreeIndexConditionBloomFilter::traverseTreeEquals( else { if (array_type) - throw Exception("An array type of bloom_filter supports only has(), indexOf(), and hasAny() functions.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "An array type of bloom_filter supports only has(), indexOf(), and hasAny() functions."); out.function = function_name == "equals" ? RPNElement::FUNCTION_EQUALS : RPNElement::FUNCTION_NOT_EQUALS; const DataTypePtr actual_type = BloomFilter::getPrimitiveType(index_type); diff --git a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h index 5d7ea371a83..952948fd582 100644 --- a/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h +++ b/src/Storages/MergeTree/MergeTreeIndexConditionBloomFilter.h @@ -53,7 +53,7 @@ public: if (const auto & bf_granule = typeid_cast(granule.get())) return mayBeTrueOnGranule(bf_granule); - throw Exception("LOGICAL ERROR: require bloom filter index granule.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: require bloom filter index granule."); } private: diff --git a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp index 411141f028b..35ca484cff0 100644 --- a/src/Storages/MergeTree/MergeTreeIndexFullText.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexFullText.cpp @@ -91,9 +91,8 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorFullText::getGranuleAndReset() void MergeTreeIndexAggregatorFullText::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); @@ -225,19 +224,19 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 || arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } return rpn_stack[0]; } +/// Keep in-sync with MergeTreeIndexConditionGin::mayBeTrueOnTranuleInPart bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx_granule) const { std::shared_ptr granule = std::dynamic_pointer_cast(idx_granule); if (!granule) - throw Exception( - "BloomFilter index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "BloomFilter index condition got a granule with the wrong type."); /// Check like in KeyCondition. std::vector rpn_stack; @@ -314,11 +313,11 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in BloomFilterCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in BloomFilterCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in BloomFilterCondition::mayBeTrueOnGranule", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in BloomFilterCondition::mayBeTrueOnGranule"); return rpn_stack[0].can_be_true; } @@ -469,6 +468,10 @@ bool MergeTreeConditionFullText::traverseTreeEquals( { key_column_num = map_keys_key_column_num; key_exists = true; + + auto const_data_type = WhichDataType(const_type); + if (!const_data_type.isStringOrFixedString() && !const_data_type.isArray()) + return false; } else { @@ -646,6 +649,8 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( std::vector key_position; Columns columns = prepared_set->getSetElements(); + size_t prepared_set_total_row_count = prepared_set->getTotalRowCount(); + for (const auto & elem : key_tuple_mapping) { bloom_filters.emplace_back(); @@ -653,7 +658,8 @@ bool MergeTreeConditionFullText::tryPrepareSetBloomFilter( size_t tuple_idx = elem.tuple_index; const auto & column = columns[tuple_idx]; - for (size_t row = 0; row < prepared_set->getTotalRowCount(); ++row) + + for (size_t row = 0; row < prepared_set_total_row_count; ++row) { bloom_filters.back().emplace_back(params); auto ref = column->getDataAt(row); @@ -716,7 +722,7 @@ MergeTreeIndexPtr bloomFilterIndexCreator( } else { - throw Exception("Unknown index type: " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index type: {}", backQuote(index.name)); } } @@ -738,29 +744,32 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/) } if (!data_type.isString() && !data_type.isFixedString()) - throw Exception("Bloom filter index can be used only with `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Bloom filter index can be used only with `String`, `FixedString`, " + "`LowCardinality(String)`, `LowCardinality(FixedString)` column " + "or Array with `String` or `FixedString` values column."); } if (index.type == NgramTokenExtractor::getName()) { if (index.arguments.size() != 4) - throw Exception("`ngrambf` index must have exactly 4 arguments.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "`ngrambf` index must have exactly 4 arguments."); } else if (index.type == SplitTokenExtractor::getName()) { if (index.arguments.size() != 3) - throw Exception("`tokenbf` index must have exactly 3 arguments.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "`tokenbf` index must have exactly 3 arguments."); } else { - throw Exception("Unknown index type: " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index type: {}", backQuote(index.name)); } assert(index.arguments.size() >= 3); for (const auto & arg : index.arguments) if (arg.getType() != Field::Types::UInt64) - throw Exception("All parameters to *bf_v1 index must be unsigned integers", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "All parameters to *bf_v1 index must be unsigned integers"); /// Just validate BloomFilterParameters params( diff --git a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp index 11e1f9efcc2..e1ce7d8075b 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranularityInfo.cpp @@ -129,7 +129,7 @@ size_t MergeTreeIndexGranularityInfo::getMarkSizeInBytes(size_t columns_num) con else if (mark_type.part_type == MergeTreeDataPartType::InMemory) return 0; else - throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE); + throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown part type"); } size_t getAdaptiveMrkSizeCompact(size_t columns_num) diff --git a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp index 64fa7264738..267708b5312 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexGranuleBloomFilter.cpp @@ -43,7 +43,7 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( : total_rows(total_rows_), bits_per_row(bits_per_row_), hash_functions(hash_functions_) { if (granule_index_blocks_.empty() || !total_rows) - throw Exception("LOGICAL ERROR: granule_index_blocks empty or total_rows is zero.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_blocks empty or total_rows is zero."); assertGranuleBlocksStructure(granule_index_blocks_); @@ -52,7 +52,7 @@ MergeTreeIndexGranuleBloomFilter::MergeTreeIndexGranuleBloomFilter( Block granule_index_block = granule_index_blocks_[index]; if (unlikely(!granule_index_block || !granule_index_block.rows())) - throw Exception("LOGICAL ERROR: granule_index_block is empty.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "LOGICAL ERROR: granule_index_block is empty."); if (index == 0) { diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp index d8765ddb9bc..2b7a40b429a 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesis.cpp @@ -81,7 +81,7 @@ MergeTreeIndexAggregatorPtr MergeTreeIndexHypothesis::createIndexAggregator() co MergeTreeIndexConditionPtr MergeTreeIndexHypothesis::createIndexCondition( const SelectQueryInfo &, ContextPtr) const { - throw Exception("Not supported", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Not supported"); } MergeTreeIndexMergedConditionPtr MergeTreeIndexHypothesis::createIndexMergedCondition( @@ -104,7 +104,7 @@ MergeTreeIndexPtr hypothesisIndexCreator(const IndexDescription & index) void hypothesisIndexValidator(const IndexDescription & index, bool /*attach*/) { if (index.expression_list_ast->children.size() != 1) - throw Exception("Hypothesis index needs exactly one expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Hypothesis index needs exactly one expression"); } } diff --git a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp index c62b5e86c75..1ab64fc84c7 100644 --- a/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexHypothesisMergedCondition.cpp @@ -142,7 +142,7 @@ bool MergeTreeIndexhypothesisMergedCondition::mayBeTrueOnGranule(const MergeTree { const auto granule = std::dynamic_pointer_cast(index_granule); if (!granule) - throw Exception("Only hypothesis index is supported here.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Only hypothesis index is supported here."); values.push_back(granule->met); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGin.cpp b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp similarity index 84% rename from src/Storages/MergeTree/MergeTreeIndexGin.cpp rename to src/Storages/MergeTree/MergeTreeIndexInverted.cpp index 26f3fcb4fb6..02222aa530c 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGin.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.cpp @@ -1,30 +1,28 @@ - -#include +#include #include -#include +#include +#include +#include #include -#include +#include +#include +#include #include -#include +#include #include #include +#include #include #include -#include -#include -#include -#include #include #include #include -#include - #include -#include -#include -#include -#include +#include +#include +#include +#include namespace DB @@ -35,19 +33,18 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; } -MergeTreeIndexGranuleGinFilter::MergeTreeIndexGranuleGinFilter( +MergeTreeIndexGranuleInverted::MergeTreeIndexGranuleInverted( const String & index_name_, size_t columns_number, const GinFilterParameters & params_) : index_name(index_name_) , params(params_) - , gin_filters( - columns_number, GinFilter(params)) + , gin_filters(columns_number, GinFilter(params)) , has_elems(false) { } -void MergeTreeIndexGranuleGinFilter::serializeBinary(WriteBuffer & ostr) const +void MergeTreeIndexGranuleInverted::serializeBinary(WriteBuffer & ostr) const { if (empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty fulltext index {}.", backQuote(index_name)); @@ -59,11 +56,11 @@ void MergeTreeIndexGranuleGinFilter::serializeBinary(WriteBuffer & ostr) const { size_t filter_size = gin_filter.getFilter().size(); size_serialization->serializeBinary(filter_size, ostr, {}); - ostr.write(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinFilter::GinSegmentWithRowIDRanges::value_type)); + ostr.write(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinSegmentWithRowIdRangeVector::value_type)); } } -void MergeTreeIndexGranuleGinFilter::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) +void MergeTreeIndexGranuleInverted::deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) { if (version != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown index version {}.", version); @@ -81,13 +78,13 @@ void MergeTreeIndexGranuleGinFilter::deserializeBinary(ReadBuffer & istr, MergeT continue; gin_filter.getFilter().assign(filter_size, {}); - istr.readStrict(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinFilter::GinSegmentWithRowIDRanges::value_type)); + istr.readStrict(reinterpret_cast(gin_filter.getFilter().data()), filter_size * sizeof(GinSegmentWithRowIdRangeVector::value_type)); } has_elems = true; } -MergeTreeIndexAggregatorGinFilter::MergeTreeIndexAggregatorGinFilter( +MergeTreeIndexAggregatorInverted::MergeTreeIndexAggregatorInverted( GinIndexStorePtr store_, const Names & index_columns_, const String & index_name_, @@ -99,37 +96,34 @@ MergeTreeIndexAggregatorGinFilter::MergeTreeIndexAggregatorGinFilter( , params(params_) , token_extractor(token_extractor_) , granule( - std::make_shared( + std::make_shared( index_name, index_columns.size(), params)) { } -MergeTreeIndexGranulePtr MergeTreeIndexAggregatorGinFilter::getGranuleAndReset() +MergeTreeIndexGranulePtr MergeTreeIndexAggregatorInverted::getGranuleAndReset() { - auto new_granule = std::make_shared( + auto new_granule = std::make_shared( index_name, index_columns.size(), params); new_granule.swap(granule); return new_granule; } -void MergeTreeIndexAggregatorGinFilter::addToGinFilter(UInt32 rowID, const char* data, size_t length, GinFilter& gin_filter, UInt64 limit) +void MergeTreeIndexAggregatorInverted::addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter, UInt64 limit) { size_t cur = 0; size_t token_start = 0; size_t token_len = 0; while (cur < length && token_extractor->nextInStringPadded(data, length, &cur, &token_start, &token_len)) - { gin_filter.add(data + token_start, token_len, rowID, store, limit); - } } -void MergeTreeIndexAggregatorGinFilter::update(const Block & block, size_t * pos, size_t limit) +void MergeTreeIndexAggregatorInverted::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); auto row_id = store->getNextRowIDRange(rows_read); @@ -189,7 +183,7 @@ void MergeTreeIndexAggregatorGinFilter::update(const Block & block, size_t * pos *pos += rows_read; } -MergeTreeConditionGinFilter::MergeTreeConditionGinFilter( +MergeTreeConditionInverted::MergeTreeConditionInverted( const SelectQueryInfo & query_info, ContextPtr context_, const Block & index_sample_block, @@ -236,7 +230,7 @@ MergeTreeConditionGinFilter::MergeTreeConditionGinFilter( } /// Keep in-sync with MergeTreeConditionFullText::alwaysUnknownOrTrue -bool MergeTreeConditionGinFilter::alwaysUnknownOrTrue() const +bool MergeTreeConditionInverted::alwaysUnknownOrTrue() const { /// Check like in KeyCondition. std::vector rpn_stack; @@ -277,19 +271,18 @@ bool MergeTreeConditionGinFilter::alwaysUnknownOrTrue() const rpn_stack.back() = arg1 || arg2; } else - throw Exception("Unexpected function type in KeyCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in KeyCondition::RPNElement"); } return rpn_stack[0]; } -bool MergeTreeConditionGinFilter::mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule,[[maybe_unused]] PostingsCacheForStore &cache_store) const +bool MergeTreeConditionInverted::mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule,[[maybe_unused]] PostingsCacheForStore & cache_store) const { - std::shared_ptr granule - = std::dynamic_pointer_cast(idx_granule); + std::shared_ptr granule + = std::dynamic_pointer_cast(idx_granule); if (!granule) - throw Exception( - "GinFilter index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "GinFilter index condition got a granule with the wrong type."); /// Check like in KeyCondition. std::vector rpn_stack; @@ -366,16 +359,16 @@ bool MergeTreeConditionGinFilter::mayBeTrueOnGranuleInPart(MergeTreeIndexGranule rpn_stack.emplace_back(true, false); } else - throw Exception("Unexpected function type in GinFilterCondition::RPNElement", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected function type in GinFilterCondition::RPNElement"); } if (rpn_stack.size() != 1) - throw Exception("Unexpected stack size in GinFilterCondition::mayBeTrueOnGranule", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected stack size in GinFilterCondition::mayBeTrueOnGranule"); return rpn_stack[0].can_be_true; } -bool MergeTreeConditionGinFilter::traverseAtomAST(const RPNBuilderTreeNode & node, RPNElement & out) +bool MergeTreeConditionInverted::traverseAtomAST(const RPNBuilderTreeNode & node, RPNElement & out) { { Field const_value; @@ -455,7 +448,7 @@ bool MergeTreeConditionGinFilter::traverseAtomAST(const RPNBuilderTreeNode & nod return false; } -bool MergeTreeConditionGinFilter::traverseASTEquals( +bool MergeTreeConditionInverted::traverseASTEquals( const String & function_name, const RPNBuilderTreeNode & key_ast, const DataTypePtr & value_type, @@ -608,7 +601,7 @@ bool MergeTreeConditionGinFilter::traverseASTEquals( out.function = RPNElement::FUNCTION_MULTI_SEARCH; /// 2d vector is not needed here but is used because already exists for FUNCTION_IN - std::vector> gin_filters; + std::vector gin_filters; gin_filters.emplace_back(); for (const auto & element : const_value.get()) { @@ -626,7 +619,7 @@ bool MergeTreeConditionGinFilter::traverseASTEquals( return false; } -bool MergeTreeConditionGinFilter::tryPrepareSetGinFilter( +bool MergeTreeConditionInverted::tryPrepareSetGinFilter( const RPNBuilderTreeNode & lhs, const RPNBuilderTreeNode & rhs, RPNElement & out) @@ -669,7 +662,7 @@ bool MergeTreeConditionGinFilter::tryPrepareSetGinFilter( if (data_type->getTypeId() != TypeIndex::String && data_type->getTypeId() != TypeIndex::FixedString) return false; - std::vector> gin_filters; + std::vector gin_filters; std::vector key_position; Columns columns = prepared_set->getSetElements(); @@ -695,55 +688,55 @@ bool MergeTreeConditionGinFilter::tryPrepareSetGinFilter( return true; } -MergeTreeIndexGranulePtr MergeTreeIndexGinFilter::createIndexGranule() const +MergeTreeIndexGranulePtr MergeTreeIndexInverted::createIndexGranule() const { - return std::make_shared(index.name, index.column_names.size(), params); + return std::make_shared(index.name, index.column_names.size(), params); } -MergeTreeIndexAggregatorPtr MergeTreeIndexGinFilter::createIndexAggregator() const +MergeTreeIndexAggregatorPtr MergeTreeIndexInverted::createIndexAggregator() const { /// should not be called: createIndexAggregatorForPart should be used assert(false); return nullptr; } -MergeTreeIndexAggregatorPtr MergeTreeIndexGinFilter::createIndexAggregatorForPart(const GinIndexStorePtr &store) const +MergeTreeIndexAggregatorPtr MergeTreeIndexInverted::createIndexAggregatorForPart(const GinIndexStorePtr & store) const { - return std::make_shared(store, index.column_names, index.name, params, token_extractor.get()); + return std::make_shared(store, index.column_names, index.name, params, token_extractor.get()); } -MergeTreeIndexConditionPtr MergeTreeIndexGinFilter::createIndexCondition( +MergeTreeIndexConditionPtr MergeTreeIndexInverted::createIndexCondition( const SelectQueryInfo & query, ContextPtr context) const { - return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); + return std::make_shared(query, context, index.sample_block, params, token_extractor.get()); }; -bool MergeTreeIndexGinFilter::mayBenefitFromIndexForIn(const ASTPtr & node) const +bool MergeTreeIndexInverted::mayBenefitFromIndexForIn(const ASTPtr & node) const { return std::find(std::cbegin(index.column_names), std::cend(index.column_names), node->getColumnName()) != std::cend(index.column_names); } -MergeTreeIndexPtr ginIndexCreator( +MergeTreeIndexPtr invertedIndexCreator( const IndexDescription & index) { size_t n = index.arguments.empty() ? 0 : index.arguments[0].get(); - Float64 density = index.arguments.size() < 2 ? 1.0f : index.arguments[1].get(); + Float64 density = index.arguments.size() < 2 ? 1.0 : index.arguments[1].get(); GinFilterParameters params(n, density); /// Use SplitTokenExtractor when n is 0, otherwise use NgramTokenExtractor if (n > 0) { auto tokenizer = std::make_unique(n); - return std::make_shared(index, params, std::move(tokenizer)); + return std::make_shared(index, params, std::move(tokenizer)); } else { auto tokenizer = std::make_unique(); - return std::make_shared(index, params, std::move(tokenizer)); + return std::make_shared(index, params, std::move(tokenizer)); } } -void ginIndexValidator(const IndexDescription & index, bool /*attach*/) +void invertedIndexValidator(const IndexDescription & index, bool /*attach*/) { for (const auto & index_data_type : index.data_types) { @@ -761,25 +754,23 @@ void ginIndexValidator(const IndexDescription & index, bool /*attach*/) } if (!data_type.isString() && !data_type.isFixedString()) - throw Exception("Inverted index can be used only with `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Inverted index can be used only with `String`, `FixedString`," + "`LowCardinality(String)`, `LowCardinality(FixedString)` " + "column or Array with `String` or `FixedString` values column."); } - if (index.type != GinFilter::getName()) - throw Exception("Unknown index type: " + backQuote(index.name), ErrorCodes::LOGICAL_ERROR); - if (index.arguments.size() > 2) - throw Exception("Inverted index must have less than two arguments.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Inverted index must have less than two arguments."); if (!index.arguments.empty() && index.arguments[0].getType() != Field::Types::UInt64) - throw Exception("The first Inverted index argument must be positive integer.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "The first Inverted index argument must be positive integer."); if (index.arguments.size() == 2 && (index.arguments[1].getType() != Field::Types::Float64 || index.arguments[1].get() <= 0 || index.arguments[1].get() > 1)) - throw Exception("The second Inverted index argument must be a float between 0 and 1.", ErrorCodes::INCORRECT_QUERY); - - size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get(); - Float64 density = index.arguments.size() < 2 ? 1.0f : index.arguments[1].get(); + throw Exception(ErrorCodes::INCORRECT_QUERY, "The second Inverted index argument must be a float between 0 and 1."); /// Just validate + size_t ngrams = index.arguments.empty() ? 0 : index.arguments[0].get(); + Float64 density = index.arguments.size() < 2 ? 1.0 : index.arguments[1].get(); GinFilterParameters params(ngrams, density); } diff --git a/src/Storages/MergeTree/MergeTreeIndexGin.h b/src/Storages/MergeTree/MergeTreeIndexInverted.h similarity index 79% rename from src/Storages/MergeTree/MergeTreeIndexGin.h rename to src/Storages/MergeTree/MergeTreeIndexInverted.h index d915d493810..baabed09905 100644 --- a/src/Storages/MergeTree/MergeTreeIndexGin.h +++ b/src/Storages/MergeTree/MergeTreeIndexInverted.h @@ -1,24 +1,23 @@ #pragma once -#include -#include -#include - -#include -#include -#include #include +#include +#include +#include +#include +#include +#include namespace DB { -struct MergeTreeIndexGranuleGinFilter final : public IMergeTreeIndexGranule +struct MergeTreeIndexGranuleInverted final : public IMergeTreeIndexGranule { - explicit MergeTreeIndexGranuleGinFilter( + explicit MergeTreeIndexGranuleInverted( const String & index_name_, size_t columns_number, const GinFilterParameters & params_); - ~MergeTreeIndexGranuleGinFilter() override = default; + ~MergeTreeIndexGranuleInverted() override = default; void serializeBinary(WriteBuffer & ostr) const override; void deserializeBinary(ReadBuffer & istr, MergeTreeIndexVersion version) override; @@ -27,30 +26,29 @@ struct MergeTreeIndexGranuleGinFilter final : public IMergeTreeIndexGranule String index_name; GinFilterParameters params; - - std::vector gin_filters; + GinFilters gin_filters; bool has_elems; }; -using MergeTreeIndexGranuleGinFilterPtr = std::shared_ptr; +using MergeTreeIndexGranuleInvertedPtr = std::shared_ptr; -struct MergeTreeIndexAggregatorGinFilter final : IMergeTreeIndexAggregator +struct MergeTreeIndexAggregatorInverted final : IMergeTreeIndexAggregator { - explicit MergeTreeIndexAggregatorGinFilter( + explicit MergeTreeIndexAggregatorInverted( GinIndexStorePtr store_, const Names & index_columns_, const String & index_name_, const GinFilterParameters & params_, TokenExtractorPtr token_extractor_); - ~MergeTreeIndexAggregatorGinFilter() override = default; + ~MergeTreeIndexAggregatorInverted() override = default; bool empty() const override { return !granule || granule->empty(); } MergeTreeIndexGranulePtr getGranuleAndReset() override; void update(const Block & block, size_t * pos, size_t limit) override; - void addToGinFilter(UInt32 rowID, const char* data, size_t length, GinFilter& gin_filter, UInt64 limit); + void addToGinFilter(UInt32 rowID, const char * data, size_t length, GinFilter & gin_filter, UInt64 limit); GinIndexStorePtr store; Names index_columns; @@ -58,21 +56,21 @@ struct MergeTreeIndexAggregatorGinFilter final : IMergeTreeIndexAggregator const GinFilterParameters params; TokenExtractorPtr token_extractor; - MergeTreeIndexGranuleGinFilterPtr granule; + MergeTreeIndexGranuleInvertedPtr granule; }; -class MergeTreeConditionGinFilter final : public IMergeTreeIndexCondition, WithContext +class MergeTreeConditionInverted final : public IMergeTreeIndexCondition, WithContext { public: - MergeTreeConditionGinFilter( + MergeTreeConditionInverted( const SelectQueryInfo & query_info, ContextPtr context, const Block & index_sample_block, const GinFilterParameters & params_, TokenExtractorPtr token_extactor_); - ~MergeTreeConditionGinFilter() override = default; + ~MergeTreeConditionInverted() override = default; bool alwaysUnknownOrTrue() const override; bool mayBeTrueOnGranule([[maybe_unused]]MergeTreeIndexGranulePtr idx_granule) const override @@ -81,7 +79,8 @@ public: assert(false); return false; } - bool mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule, [[maybe_unused]] PostingsCacheForStore& cache_store) const; + bool mayBeTrueOnGranuleInPart(MergeTreeIndexGranulePtr idx_granule, [[maybe_unused]] PostingsCacheForStore & cache_store) const; + private: struct KeyTuplePositionMapping { @@ -124,7 +123,7 @@ private: std::unique_ptr gin_filter; /// For FUNCTION_IN, FUNCTION_NOT_IN and FUNCTION_MULTI_SEARCH - std::vector> set_gin_filters; + std::vector set_gin_filters; /// For FUNCTION_IN and FUNCTION_NOT_IN std::vector set_key_position; @@ -154,10 +153,10 @@ private: PreparedSetsPtr prepared_sets; }; -class MergeTreeIndexGinFilter final : public IMergeTreeIndex +class MergeTreeIndexInverted final : public IMergeTreeIndex { public: - MergeTreeIndexGinFilter( + MergeTreeIndexInverted( const IndexDescription & index_, const GinFilterParameters & params_, std::unique_ptr && token_extractor_) @@ -165,13 +164,12 @@ public: , params(params_) , token_extractor(std::move(token_extractor_)) {} - ~MergeTreeIndexGinFilter() override = default; + ~MergeTreeIndexInverted() override = default; MergeTreeIndexGranulePtr createIndexGranule() const override; MergeTreeIndexAggregatorPtr createIndexAggregator() const override; - MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr &store) const override; - MergeTreeIndexConditionPtr createIndexCondition( - const SelectQueryInfo & query, ContextPtr context) const override; + MergeTreeIndexAggregatorPtr createIndexAggregatorForPart(const GinIndexStorePtr & store) const override; + MergeTreeIndexConditionPtr createIndexCondition(const SelectQueryInfo & query, ContextPtr context) const override; bool mayBenefitFromIndexForIn(const ASTPtr & node) const override; diff --git a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp index e145ae68e65..d80f7521430 100644 --- a/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexMinMax.cpp @@ -34,8 +34,7 @@ MergeTreeIndexGranuleMinMax::MergeTreeIndexGranuleMinMax( void MergeTreeIndexGranuleMinMax::serializeBinary(WriteBuffer & ostr) const { if (empty()) - throw Exception( - "Attempt to write empty minmax index " + backQuote(index_name), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to write empty minmax index {}", backQuote(index_name)); for (size_t i = 0; i < index_sample_block.columns(); ++i) { @@ -122,9 +121,8 @@ MergeTreeIndexGranulePtr MergeTreeIndexAggregatorMinMax::getGranuleAndReset() void MergeTreeIndexAggregatorMinMax::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); @@ -191,8 +189,7 @@ bool MergeTreeIndexConditionMinMax::mayBeTrueOnGranule(MergeTreeIndexGranulePtr std::shared_ptr granule = std::dynamic_pointer_cast(idx_granule); if (!granule) - throw Exception( - "Minmax index condition got a granule with the wrong type.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Minmax index condition got a granule with the wrong type."); return condition.checkInHyperrectangle(granule->hyperrectangle, index_data_types).can_be_true; } diff --git a/src/Storages/MergeTree/MergeTreeIndexSet.cpp b/src/Storages/MergeTree/MergeTreeIndexSet.cpp index db99a2f37be..d28272b6d73 100644 --- a/src/Storages/MergeTree/MergeTreeIndexSet.cpp +++ b/src/Storages/MergeTree/MergeTreeIndexSet.cpp @@ -145,9 +145,8 @@ MergeTreeIndexAggregatorSet::MergeTreeIndexAggregatorSet(const String & index_na void MergeTreeIndexAggregatorSet::update(const Block & block, size_t * pos, size_t limit) { if (*pos >= block.rows()) - throw Exception( - "The provided position is not less than the number of block rows. Position: " - + toString(*pos) + ", Block rows: " + toString(block.rows()) + ".", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "The provided position is not less than the number of block rows. " + "Position: {}, Block rows: {}.", toString(*pos), toString(block.rows())); size_t rows_read = std::min(limit, block.rows() - *pos); @@ -713,9 +712,9 @@ MergeTreeIndexPtr setIndexCreator(const IndexDescription & index) void setIndexValidator(const IndexDescription & index, bool /*attach*/) { if (index.arguments.size() != 1) - throw Exception("Set index must have exactly one argument.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Set index must have exactly one argument."); else if (index.arguments[0].getType() != Field::Types::UInt64) - throw Exception("Set index argument must be positive integer.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Set index argument must be positive integer."); } } diff --git a/src/Storages/MergeTree/MergeTreeIndices.cpp b/src/Storages/MergeTree/MergeTreeIndices.cpp index e5e376e7f69..2be9ecd8de3 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.cpp +++ b/src/Storages/MergeTree/MergeTreeIndices.cpp @@ -20,13 +20,13 @@ namespace ErrorCodes void MergeTreeIndexFactory::registerCreator(const std::string & index_type, Creator creator) { if (!creators.emplace(index_type, std::move(creator)).second) - throw Exception("MergeTreeIndexFactory: the Index creator name '" + index_type + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeIndexFactory: the Index creator name '{}' is not unique", + index_type); } void MergeTreeIndexFactory::registerValidator(const std::string & index_type, Validator validator) { if (!validators.emplace(index_type, std::move(validator)).second) - throw Exception("MergeTreeIndexFactory: the Index validator name '" + index_type + "' is not unique", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MergeTreeIndexFactory: the Index validator name '{}' is not unique", index_type); } @@ -35,8 +35,8 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get( { auto it = creators.find(index.type); if (it == creators.end()) - throw Exception( - "Unknown Index type '" + index.type + "'. Available index types: " + + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Unknown Index type '{}'. Available index types: {}", index.type, std::accumulate(creators.cbegin(), creators.cend(), std::string{}, [] (auto && left, const auto & right) -> std::string { @@ -44,8 +44,8 @@ MergeTreeIndexPtr MergeTreeIndexFactory::get( return right.first; else return left + ", " + right.first; - }), - ErrorCodes::INCORRECT_QUERY); + }) + ); return it->second(index); } @@ -63,9 +63,9 @@ void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach { auto it = validators.find(index.type); if (it == validators.end()) - throw Exception( - "Unknown Index type '" + index.type + "'. Available index types: " - + std::accumulate( + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Unknown Index type '{}'. Available index types: {}", index.type, + std::accumulate( validators.cbegin(), validators.cend(), std::string{}, @@ -75,8 +75,8 @@ void MergeTreeIndexFactory::validate(const IndexDescription & index, bool attach return right.first; else return left + ", " + right.first; - }), - ErrorCodes::INCORRECT_QUERY); + }) + ); it->second(index, attach); } @@ -105,8 +105,9 @@ MergeTreeIndexFactory::MergeTreeIndexFactory() registerCreator("annoy", annoyIndexCreator); registerValidator("annoy", annoyIndexValidator); #endif - registerCreator("inverted", ginIndexCreator); - registerValidator("inverted", ginIndexValidator); + + registerCreator("inverted", invertedIndexCreator); + registerValidator("inverted", invertedIndexValidator); } diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h index 52cf8c850b3..1ad6b082223 100644 --- a/src/Storages/MergeTree/MergeTreeIndices.h +++ b/src/Storages/MergeTree/MergeTreeIndices.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -16,7 +17,6 @@ #include #include -#include constexpr auto INDEX_FILE_PREFIX = "skp_idx_"; @@ -237,7 +237,8 @@ void hypothesisIndexValidator(const IndexDescription & index, bool attach); MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index); void annoyIndexValidator(const IndexDescription & index, bool attach); #endif -MergeTreeIndexPtr ginIndexCreator(const IndexDescription& index); -void ginIndexValidator(const IndexDescription& index, bool attach); + +MergeTreeIndexPtr invertedIndexCreator(const IndexDescription& index); +void invertedIndexValidator(const IndexDescription& index, bool attach); } diff --git a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp index 397a9d82655..3fc7ff54c35 100644 --- a/src/Storages/MergeTree/MergeTreeMarksLoader.cpp +++ b/src/Storages/MergeTree/MergeTreeMarksLoader.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -106,7 +107,7 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarksImpl() ErrorCodes::CORRUPTED_DATA, "Bad size of marks file '{}': {}, must be: {}", std::string(fs::path(data_part_storage->getFullPath()) / mrk_path), - std::to_string(file_size), std::to_string(expected_uncompressed_size)); + file_size, expected_uncompressed_size); auto buffer = data_part_storage->readFile(mrk_path, read_settings.adjustBufferSize(file_size), file_size, std::nullopt); std::unique_ptr reader; @@ -178,29 +179,11 @@ MarkCache::MappedPtr MergeTreeMarksLoader::loadMarks() std::future MergeTreeMarksLoader::loadMarksAsync() { - ThreadGroupStatusPtr thread_group; - if (CurrentThread::isInitialized() && CurrentThread::get().getThreadGroup()) - thread_group = CurrentThread::get().getThreadGroup(); - - auto task = std::make_shared>([thread_group, this] - { - setThreadName("loadMarksThread"); - - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE({ - if (thread_group) - CurrentThread::detachQuery(); - }); - - ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks); - return loadMarks(); - }); - - auto task_future = task->get_future(); - load_marks_threadpool->scheduleOrThrow([task]{ (*task)(); }); - return task_future; + return scheduleFromThreadPool([this]() -> MarkCache::MappedPtr + { + ProfileEvents::increment(ProfileEvents::BackgroundLoadingMarksTasks); + return loadMarks(); + }, *load_marks_threadpool, "LoadMarksThread"); } } diff --git a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp index 9906ea3d02a..2e30a3f3986 100644 --- a/src/Storages/MergeTree/MergeTreeMutationEntry.cpp +++ b/src/Storages/MergeTree/MergeTreeMutationEntry.cpp @@ -42,7 +42,9 @@ UInt64 MergeTreeMutationEntry::parseFileName(const String & file_name_) { if (UInt64 maybe_block_number = tryParseFileName(file_name_)) return maybe_block_number; - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse mutation version from file name, expected 'mutation_.txt', got '{}'", file_name_); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot parse mutation version from file name, expected 'mutation_.txt', got '{}'", + file_name_); } MergeTreeMutationEntry::MergeTreeMutationEntry(MutationCommands commands_, DiskPtr disk_, const String & path_prefix_, UInt64 tmp_number, diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index 3b1c41f61ba..a6baecee125 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -143,7 +143,7 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, D || !checkChar('_', in) || !tryReadIntText(max_yyyymmdd, in)) { - throw Exception("Unexpected part name: " + part_name, ErrorCodes::BAD_DATA_PART_NAME); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Unexpected part name: {}", part_name); } const auto & date_lut = DateLUT::instance(); @@ -155,7 +155,7 @@ void MergeTreePartInfo::parseMinMaxDatesFromPartName(const String & part_name, D auto max_month = date_lut.toNumYYYYMM(max_date); if (min_month != max_month) - throw Exception("Part name " + part_name + " contains different months", ErrorCodes::BAD_DATA_PART_NAME); + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Part name {} contains different months", part_name); } diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 5d4b4853812..903f467d159 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -200,7 +200,7 @@ String MergeTreePartition::getID(const MergeTreeData & storage) const String MergeTreePartition::getID(const Block & partition_key_sample) const { if (value.size() != partition_key_sample.columns()) - throw Exception("Invalid partition key size: " + toString(value.size()), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid partition key size: {}", value.size()); if (value.empty()) return "all"; /// It is tempting to use an empty string here. But that would break directory structure in ZK. diff --git a/src/Storages/MergeTree/MergeTreePartsMover.cpp b/src/Storages/MergeTree/MergeTreePartsMover.cpp index b618b068769..2900e4aab7c 100644 --- a/src/Storages/MergeTree/MergeTreePartsMover.cpp +++ b/src/Storages/MergeTree/MergeTreePartsMover.cpp @@ -205,7 +205,7 @@ bool MergeTreePartsMover::selectPartsForMove( MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEntry & moving_part) const { if (moves_blocker.isCancelled()) - throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); auto settings = data->getSettings(); auto part = moving_part.part; @@ -253,7 +253,7 @@ MergeTreeMutableDataPartPtr MergeTreePartsMover::clonePart(const MergeTreeMoveEn void MergeTreePartsMover::swapClonedPart(const MergeTreeMutableDataPartPtr & cloned_part) const { if (moves_blocker.isCancelled()) - throw Exception("Cancelled moving parts.", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Cancelled moving parts."); auto active_part = data->getActiveContainingPart(cloned_part->name); diff --git a/src/Storages/MergeTree/MergeTreeRangeReader.cpp b/src/Storages/MergeTree/MergeTreeRangeReader.cpp index 6f8da624449..f5afc0b37d6 100644 --- a/src/Storages/MergeTree/MergeTreeRangeReader.cpp +++ b/src/Storages/MergeTree/MergeTreeRangeReader.cpp @@ -189,24 +189,24 @@ MergeTreeRangeReader::Stream::Stream( { size_t marks_count = index_granularity->getMarksCount(); if (from_mark >= marks_count) - throw Exception("Trying create stream to read from mark №"+ toString(current_mark) + " but total marks count is " - + toString(marks_count), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying create stream to read from mark №{} but total marks count is {}", + toString(current_mark), toString(marks_count)); if (last_mark > marks_count) - throw Exception("Trying create stream to read to mark №"+ toString(current_mark) + " but total marks count is " - + toString(marks_count), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying create stream to read to mark №{} but total marks count is {}", + toString(current_mark), toString(marks_count)); } void MergeTreeRangeReader::Stream::checkNotFinished() const { if (isFinished()) - throw Exception("Cannot read out of marks range.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot read out of marks range."); } void MergeTreeRangeReader::Stream::checkEnoughSpaceInCurrentGranule(size_t num_rows) const { if (num_rows + offset_after_current_mark > current_mark_index_granularity) - throw Exception("Cannot read from granule more than index_granularity.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot read from granule more than index_granularity."); } size_t MergeTreeRangeReader::Stream::readRows(Columns & columns, size_t num_rows) @@ -229,7 +229,8 @@ void MergeTreeRangeReader::Stream::toNextMark() else if (current_mark == total_marks_count) current_mark_index_granularity = 0; /// HACK? else - throw Exception("Trying to read from mark " + toString(current_mark) + ", but total marks count " + toString(total_marks_count), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to read from mark {}, but total marks count {}", + toString(current_mark), toString(total_marks_count)); offset_after_current_mark = 0; } @@ -305,12 +306,12 @@ void MergeTreeRangeReader::ReadResult::adjustLastGranule() size_t num_rows_to_subtract = total_rows_per_granule - num_read_rows; if (rows_per_granule.empty()) - throw Exception("Can't adjust last granule because no granules were added", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't adjust last granule because no granules were added"); if (num_rows_to_subtract > rows_per_granule.back()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Can't adjust last granule because it has {} rows, but try to subtract {} rows.", - toString(rows_per_granule.back()), toString(num_rows_to_subtract)); + rows_per_granule.back(), num_rows_to_subtract); rows_per_granule.back() -= num_rows_to_subtract; total_rows_per_granule -= num_rows_to_subtract; @@ -922,7 +923,7 @@ bool MergeTreeRangeReader::isCurrentRangeFinished() const MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, MarkRanges & ranges) { if (max_rows == 0) - throw Exception("Expected at least 1 row to read, got 0.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected at least 1 row to read, got 0."); ReadResult read_result(log); @@ -1197,8 +1198,8 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si /// added_rows may be zero if all columns were read in prewhere and it's ok. if (num_rows && num_rows != result.total_rows_per_granule) - throw Exception("RangeReader read " + toString(num_rows) + " rows, but " - + toString(result.total_rows_per_granule) + " expected.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "RangeReader read {} rows, but {} expected.", + num_rows, result.total_rows_per_granule); return columns; } diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 0f85fd2ad9c..37b24422af0 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -201,6 +201,10 @@ std::vector MergeTreeReadPool::fillPerPartInfo(const RangesInDataParts & for (const auto i : collections::range(0, parts.size())) { const auto & part = parts[i]; +#ifndef NDEBUG + assertSortedAndNonIntersecting(part.ranges); +#endif + bool part_on_remote_disk = part.data_part->isStoredOnRemoteDisk(); is_part_on_remote_disk[i] = part_on_remote_disk; do_not_steal_tasks |= part_on_remote_disk; @@ -315,7 +319,7 @@ void MergeTreeReadPool::fillPerThreadInfo( while (need_marks > 0) { if (part.ranges.empty()) - throw Exception("Unexpected end of ranges while spreading marks among threads", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected end of ranges while spreading marks among threads"); MarkRange & range = part.ranges.front(); diff --git a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp index 3b3a6b95cff..c54f54e62e0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderInMemory.cpp @@ -57,16 +57,16 @@ size_t MergeTreeReaderInMemory::readRows( size_t total_marks = data_part_info_for_read->getIndexGranularity().getMarksCount(); if (from_mark >= total_marks) - throw Exception("Mark " + toString(from_mark) + " is out of bound. Max mark: " - + toString(total_marks), ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Mark {} is out of bound. Max mark: {}", + toString(from_mark), toString(total_marks)); size_t num_columns = res_columns.size(); checkNumberOfColumns(num_columns); size_t part_rows = part_in_memory->block.rows(); if (total_rows_read >= part_rows) - throw Exception("Cannot read data in MergeTreeReaderInMemory. Rows already read: " - + toString(total_rows_read) + ". Rows in part: " + toString(part_rows), ErrorCodes::CANNOT_READ_ALL_DATA); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Cannot read data in MergeTreeReaderInMemory. " + "Rows already read: {}. Rows in part: {}", total_rows_read, part_rows); size_t rows_to_read = std::min(max_rows_to_read, part_rows - total_rows_read); for (size_t i = 0; i < num_columns; ++i) diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index ae1bace79e3..6eafd8824b8 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -25,7 +26,7 @@ struct Settings; M(UInt64, min_compress_block_size, 0, "When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used.", 0) \ M(UInt64, max_compress_block_size, 0, "Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used.", 0) \ M(UInt64, index_granularity, 8192, "How many rows correspond to one primary key value.", 0) \ - M(UInt64, max_digestion_size_per_segment, 1024 * 1024 * 256, "Max number of bytes to digest per segment to build GIN index.", 0) \ + M(UInt64, max_digestion_size_per_segment, 256_MiB, "Max number of bytes to digest per segment to build GIN index.", 0) \ \ /** Data storing format settings. */ \ M(UInt64, min_bytes_for_wide_part, 10485760, "Minimal uncompressed size in bytes to create part in wide format instead of compact", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp index 5b916096e06..9331c9723b5 100644 --- a/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp +++ b/src/Storages/MergeTree/MergeTreeWriteAheadLog.cpp @@ -193,7 +193,7 @@ MergeTreeData::MutableDataPartsVector MergeTreeWriteAheadLog::restore( } else { - throw Exception("Unknown action type: " + toString(static_cast(action_type)), ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Unknown action type: {}", toString(static_cast(action_type))); } } catch (const Exception & e) @@ -356,8 +356,9 @@ void MergeTreeWriteAheadLog::ActionMetadata::read(ReadBuffer & meta_in) { readIntBinary(min_compatible_version, meta_in); if (min_compatible_version > WAL_VERSION) - throw Exception("WAL metadata version " + toString(min_compatible_version) - + " is not compatible with this ClickHouse version", ErrorCodes::UNKNOWN_FORMAT_VERSION); + throw Exception(ErrorCodes::UNKNOWN_FORMAT_VERSION, + "WAL metadata version {} is not compatible with this ClickHouse version", + toString(min_compatible_version)); size_t metadata_size; readVarUInt(metadata_size, meta_in); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 3a7484a4141..ced43ae25b0 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -242,8 +242,8 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis written_files.emplace_back(std::move(file)); } else if (rows_count) - throw Exception("MinMax index was not initialized for new non-empty part " + new_part->name - + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "MinMax index was not initialized for new non-empty part {}. It is a bug.", + new_part->name); } { @@ -298,8 +298,8 @@ MergedBlockOutputStream::WrittenFiles MergedBlockOutputStream::finalizePartOnDis } else { - throw Exception("Compression codec have to be specified for part on disk, empty for" + new_part->name - + ". It is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Compression codec have to be specified for part on disk, empty for{}. " + "It is a bug.", new_part->name); } { diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index e4a5a0bc3ba..03829f1daf9 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -42,7 +42,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( auto * writer_on_disk = dynamic_cast(writer.get()); if (!writer_on_disk) - throw Exception("MergedColumnOnlyOutputStream supports only parts stored on disk", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MergedColumnOnlyOutputStream supports only parts stored on disk"); writer_on_disk->setWrittenOffsetColumns(offset_columns_); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 6e222a562a0..78ee66f4d9b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -268,8 +268,10 @@ getColumnsForNewDataPart( /// should it's previous version should be dropped or removed if (renamed_columns_to_from.contains(it->name) && !was_renamed && !was_removed) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Incorrect mutation commands, trying to rename column {} to {}, but part {} already has column {}", renamed_columns_to_from[it->name], it->name, source_part->name, it->name); + ErrorCodes::LOGICAL_ERROR, + "Incorrect mutation commands, trying to rename column {} to {}, " + "but part {} already has column {}", + renamed_columns_to_from[it->name], it->name, source_part->name, it->name); /// Column was renamed and no other column renamed to it's name /// or column is dropped. @@ -628,6 +630,8 @@ void finalizeMutatedPart( ContextPtr context, bool sync) { + std::vector> written_files; + if (new_data_part->uuid != UUIDHelpers::Nil) { auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::UUID_FILE_NAME, 4096, context->getWriteSettings()); @@ -635,8 +639,7 @@ void finalizeMutatedPart( writeUUIDText(new_data_part->uuid, out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::UUID_FILE_NAME].file_hash = out_hashing.getHash(); - if (sync) - out_hashing.sync(); + written_files.push_back(std::move(out)); } if (execute_ttl_type != ExecuteTTLType::NONE) @@ -647,43 +650,47 @@ void finalizeMutatedPart( new_data_part->ttl_infos.write(out_hashing); new_data_part->checksums.files["ttl.txt"].file_size = out_hashing.count(); new_data_part->checksums.files["ttl.txt"].file_hash = out_hashing.getHash(); - if (sync) - out_hashing.sync(); + written_files.push_back(std::move(out_ttl)); } if (!new_data_part->getSerializationInfos().empty()) { - auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, context->getWriteSettings()); - HashingWriteBuffer out_hashing(*out); + auto out_serialization = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::SERIALIZATION_FILE_NAME, 4096, context->getWriteSettings()); + HashingWriteBuffer out_hashing(*out_serialization); new_data_part->getSerializationInfos().writeJSON(out_hashing); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_size = out_hashing.count(); new_data_part->checksums.files[IMergeTreeDataPart::SERIALIZATION_FILE_NAME].file_hash = out_hashing.getHash(); - if (sync) - out_hashing.sync(); + written_files.push_back(std::move(out_serialization)); } { /// Write file with checksums. auto out_checksums = new_data_part->getDataPartStorage().writeFile("checksums.txt", 4096, context->getWriteSettings()); new_data_part->checksums.write(*out_checksums); - if (sync) - out_checksums->sync(); - } /// close fd + written_files.push_back(std::move(out_checksums)); + } { - auto out = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); - DB::writeText(queryToString(codec->getFullCodecDesc()), *out); - if (sync) - out->sync(); - } /// close fd + auto out_comp = new_data_part->getDataPartStorage().writeFile(IMergeTreeDataPart::DEFAULT_COMPRESSION_CODEC_FILE_NAME, 4096, context->getWriteSettings()); + DB::writeText(queryToString(codec->getFullCodecDesc()), *out_comp); + written_files.push_back(std::move(out_comp)); + } { /// Write a file with a description of columns. auto out_columns = new_data_part->getDataPartStorage().writeFile("columns.txt", 4096, context->getWriteSettings()); new_data_part->getColumns().writeText(*out_columns); + written_files.push_back(std::move(out_columns)); + } + + for (auto & file : written_files) + { + file->finalize(); if (sync) - out_columns->sync(); - } /// close fd + file->sync(); + } + /// Close files + written_files.clear(); new_data_part->rows_count = source_part->rows_count; new_data_part->index_granularity = source_part->index_granularity; @@ -1173,7 +1180,7 @@ private: ctx->projections_to_build = MutationHelpers::getProjectionsForNewDataPart(ctx->metadata_snapshot->getProjections(), ctx->for_file_renames); if (!ctx->mutating_pipeline_builder.initialized()) - throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot mutate part columns with uninitialized mutations stream. It's a bug"); QueryPipelineBuilder builder(std::move(ctx->mutating_pipeline_builder)); @@ -1526,7 +1533,7 @@ bool MutateTask::prepare() if (ctx->future_part->parts.size() != 1) throw Exception(ErrorCodes::LOGICAL_ERROR, "Trying to mutate {} parts, not one. " - "This is a bug.", toString(ctx->future_part->parts.size())); + "This is a bug.", ctx->future_part->parts.size()); ctx->num_mutations = std::make_unique(CurrentMetrics::PartMutation); diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index 560d9f17a07..65a82988a5d 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -702,7 +702,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::ge return entry; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Task with id {} not found", toString(task_uuid)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Task with id {} not found", task_uuid); } String PartMovesBetweenShardsOrchestrator::Entry::toString() const diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h index 2474b5807e2..24454b897af 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.h @@ -84,7 +84,7 @@ public: case CANCELLED: return "CANCELLED"; } - throw Exception("Unknown EntryState: " + DB::toString(value), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown EntryState: {}", DB::toString(value)); } static EntryState::Value fromString(String in) @@ -100,7 +100,7 @@ public: else if (in == "REMOVE_UUID_PIN") return REMOVE_UUID_PIN; else if (in == "DONE") return DONE; else if (in == "CANCELLED") return CANCELLED; - else throw Exception("Unknown state: " + in, ErrorCodes::LOGICAL_ERROR); + else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown state: {}", in); } }; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 1c667b1c867..f43d7705d00 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -90,7 +90,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() Coordination::Stat stat; if (!zookeeper->exists(storage.zookeeper_path + "/log", &stat)) - throw Exception(storage.zookeeper_path + "/log doesn't exist", ErrorCodes::NOT_FOUND_NODE); + throw Exception(ErrorCodes::NOT_FOUND_NODE, "{}/log doesn't exist", storage.zookeeper_path); int children_count = stat.numChildren; @@ -293,7 +293,7 @@ void ReplicatedMergeTreeCleanupThread::markLostReplicas(const std::unordered_map } if (candidate_lost_replicas.size() == replicas_count) - throw Exception("All replicas are stale: we won't mark any replica as lost", ErrorCodes::ALL_REPLICAS_LOST); + throw Exception(ErrorCodes::ALL_REPLICAS_LOST, "All replicas are stale: we won't mark any replica as lost"); std::vector futures; for (size_t i = 0; i < candidate_lost_replicas.size(); ++i) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h index 84115cb2b8a..a8c72f3234c 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h @@ -63,7 +63,7 @@ struct ReplicatedMergeTreeLogEntryData case ReplicatedMergeTreeLogEntryData::SYNC_PINNED_PART_UUIDS: return "SYNC_PINNED_PART_UUIDS"; case ReplicatedMergeTreeLogEntryData::CLONE_PART_FROM_SHARD: return "CLONE_PART_FROM_SHARD"; default: - throw Exception("Unknown log entry type: " + DB::toString(type), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown log entry type: {}", DB::toString(type)); } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 6f1d8dd93e7..d653f5e3286 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -347,7 +347,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na } if (local_part_header.getColumnsHash() != zk_part_header.getColumnsHash()) - throw Exception("Columns of local part " + part_name + " are different from ZooKeeper", ErrorCodes::TABLE_DIFFERS_TOO_MUCH); + throw Exception(ErrorCodes::TABLE_DIFFERS_TOO_MUCH, "Columns of local part {} are different from ZooKeeper", part_name); zk_part_header.getChecksums().checkEqual(local_part_header.getChecksums(), true); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index bd75d76109a..e47dddb9795 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -432,7 +432,7 @@ void ReplicatedMergeTreeQueue::removeCoveredPartsFromMutations(const String & pa else if (remove_part) status.parts_to_do.remove(part_name); else - throw Exception("Called remove part from mutations, but nothing removed", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Called remove part from mutations, but nothing removed"); if (status.parts_to_do.size() == 0) some_mutations_are_probably_done = true; @@ -581,7 +581,7 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper } if (pull_log_blocker.isCancelled()) - throw Exception("Log pulling is cancelled", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Log pulling is cancelled"); String index_str = zookeeper->get(fs::path(replica_path) / "log_pointer"); UInt64 index; @@ -637,8 +637,8 @@ int32_t ReplicatedMergeTreeQueue::pullLogsToQueue(zkutil::ZooKeeperPtr zookeeper String last_entry = *last; if (!startsWith(last_entry, "log-")) - throw Exception("Error in zookeeper data: unexpected node " + last_entry + " in " + zookeeper_path + "/log", - ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER); + throw Exception(ErrorCodes::UNEXPECTED_NODE_IN_ZOOKEEPER, "Error in zookeeper data: unexpected node {} in {}/log", + last_entry, zookeeper_path); UInt64 last_entry_index = parse(last_entry.substr(strlen("log-"))); @@ -1576,7 +1576,7 @@ void ReplicatedMergeTreeQueue::CurrentlyExecuting::setActualPartName( std::vector & covered_entries_to_wait) { if (!entry.actual_new_part_name.empty()) - throw Exception("Entry actual part isn't empty yet. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Entry actual part isn't empty yet. This is a bug."); entry.actual_new_part_name = actual_part_name; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 164ef4f3239..ba0a4e165a9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -274,10 +274,10 @@ ReplicatedMergeTreeSinkImpl::~ReplicatedMergeTreeSinkImpl() = defa static void assertSessionIsNotExpired(const zkutil::ZooKeeperPtr & zookeeper) { if (!zookeeper) - throw Exception("No ZooKeeper session.", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "No ZooKeeper session."); if (zookeeper->expired()) - throw Exception("ZooKeeper session has been expired.", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "ZooKeeper session has been expired."); } template @@ -341,7 +341,7 @@ size_t ReplicatedMergeTreeSinkImpl::checkQuorumPrecondition(const auto host = get_results[1]; if (is_active.error == Coordination::Error::ZNONODE || host.error == Coordination::Error::ZNONODE) - throw Exception("Replica is not active right now", ErrorCodes::READONLY); + throw Exception(ErrorCodes::READONLY, "Replica is not active right now"); quorum_info.is_active_node_version = is_active.stat.version; quorum_info.host_node_version = host.stat.version; @@ -395,7 +395,7 @@ void ReplicatedMergeTreeSinkImpl::consume(Chunk chunk) if (const auto * chunk_offsets_ptr = typeid_cast(chunk_info.get())) chunk_offsets = std::make_shared(chunk_offsets_ptr->offsets); else - throw Exception("No chunk info for async inserts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No chunk info for async inserts"); } auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot, context, chunk_offsets); @@ -861,7 +861,9 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( block_id_path.clear(); } else - throw Exception("Conflict block ids and block number lock should not be empty at the same time for async inserts", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Conflict block ids and block number lock should not " + "be empty at the same time for async inserts"); /// Information about the part. storage.getCommitPartOps(ops, part, block_id_path); @@ -1000,7 +1002,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( if (loop_counter == max_iterations) { part->is_duplicate = true; /// Part is duplicate, just remove it from local FS - throw Exception("Too many transaction retries - it may indicate an error", ErrorCodes::DUPLICATE_DATA_PART); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Too many transaction retries - it may indicate an error"); } retries_ctl.requestUnconditionalRetry(); /// we want one more iteration w/o counting it as a try and timeout return; @@ -1021,7 +1023,7 @@ std::vector ReplicatedMergeTreeSinkImpl::commitPart( /// So make it temporary to avoid its resurrection on restart rename_part_to_temporary(); - throw Exception("Another quorum insert has been already started", ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE); + throw Exception(ErrorCodes::UNSATISFIED_QUORUM_FOR_PREVIOUS_WRITE, "Another quorum insert has been already started"); } else { @@ -1125,7 +1127,7 @@ void ReplicatedMergeTreeSinkImpl::waitForQuorum( break; if (!event->tryWait(quorum_timeout_ms)) - throw Exception("Timeout while waiting for quorum", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout while waiting for quorum"); LOG_TRACE(log, "Quorum {} for part {} updated, will check quorum node still exists", quorum_path, part_name); } @@ -1136,14 +1138,14 @@ void ReplicatedMergeTreeSinkImpl::waitForQuorum( String value; if (!zookeeper->tryGet(storage.replica_path + "/is_active", value, &stat) || stat.version != is_active_node_version) - throw Exception("Replica become inactive while waiting for quorum", ErrorCodes::NO_ACTIVE_REPLICAS); + throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "Replica become inactive while waiting for quorum"); } catch (...) { /// We do not know whether or not data has been inserted /// - whether other replicas have time to download the part and mark the quorum as done. - throw Exception("Unknown status, client must retry. Reason: " + getCurrentExceptionMessage(false), - ErrorCodes::UNKNOWN_STATUS_OF_INSERT); + throw Exception(ErrorCodes::UNKNOWN_STATUS_OF_INSERT, "Unknown status, client must retry. Reason: {}", + getCurrentExceptionMessage(false)); } LOG_TRACE(log, "Quorum '{}' for part {} satisfied", quorum_path, part_name); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index cb916e393cf..f06ce725daa 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -186,58 +186,50 @@ void ReplicatedMergeTreeTableMetadata::checkImmutableFieldsEquals(const Replicat if (data_format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { if (date_column != from_zk.date_column) - throw Exception("Existing table metadata in ZooKeeper differs in date index column." - " Stored in ZooKeeper: " + from_zk.date_column + ", local: " + date_column, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in date index column. " + "Stored in ZooKeeper: {}, local: {}", from_zk.date_column, date_column); } else if (!from_zk.date_column.empty()) { - throw Exception( - "Existing table metadata in ZooKeeper differs in date index column." - " Stored in ZooKeeper: " + from_zk.date_column + ", local is custom-partitioned.", - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in date index column. " + "Stored in ZooKeeper: {}, local is custom-partitioned.", from_zk.date_column); } if (index_granularity != from_zk.index_granularity) - throw Exception("Existing table metadata in ZooKeeper differs in index granularity." - " Stored in ZooKeeper: " + DB::toString(from_zk.index_granularity) + ", local: " + DB::toString(index_granularity), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs " + "in index granularity. Stored in ZooKeeper: {}, local: {}", + DB::toString(from_zk.index_granularity), DB::toString(index_granularity)); if (merging_params_mode != from_zk.merging_params_mode) - throw Exception("Existing table metadata in ZooKeeper differs in mode of merge operation." - " Stored in ZooKeeper: " + DB::toString(from_zk.merging_params_mode) + ", local: " - + DB::toString(merging_params_mode), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in mode of merge operation. " + "Stored in ZooKeeper: {}, local: {}", DB::toString(from_zk.merging_params_mode), + DB::toString(merging_params_mode)); if (sign_column != from_zk.sign_column) - throw Exception("Existing table metadata in ZooKeeper differs in sign column." - " Stored in ZooKeeper: " + from_zk.sign_column + ", local: " + sign_column, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sign column. " + "Stored in ZooKeeper: {}, local: {}", from_zk.sign_column, sign_column); /// NOTE: You can make a less strict check of match expressions so that tables do not break from small changes /// in formatAST code. String parsed_zk_primary_key = formattedAST(KeyDescription::parse(from_zk.primary_key, columns, context).expression_list_ast); if (primary_key != parsed_zk_primary_key) - throw Exception("Existing table metadata in ZooKeeper differs in primary key." - " Stored in ZooKeeper: " + from_zk.primary_key + - ", parsed from ZooKeeper: " + parsed_zk_primary_key + - ", local: " + primary_key, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in primary key. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.primary_key, parsed_zk_primary_key, primary_key); if (data_format_version != from_zk.data_format_version) - throw Exception("Existing table metadata in ZooKeeper differs in data format version." - " Stored in ZooKeeper: " + DB::toString(from_zk.data_format_version.toUnderType()) + - ", local: " + DB::toString(data_format_version.toUnderType()), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in data format version. " + "Stored in ZooKeeper: {}, local: {}", DB::toString(from_zk.data_format_version.toUnderType()), + DB::toString(data_format_version.toUnderType())); String parsed_zk_partition_key = formattedAST(KeyDescription::parse(from_zk.partition_key, columns, context).expression_list_ast); if (partition_key != parsed_zk_partition_key) - throw Exception( - "Existing table metadata in ZooKeeper differs in partition key expression." - " Stored in ZooKeeper: " + from_zk.partition_key + - ", parsed from ZooKeeper: " + parsed_zk_partition_key + - ", local: " + partition_key, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in partition key expression. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.partition_key, parsed_zk_partition_key, partition_key); } void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const @@ -248,75 +240,57 @@ void ReplicatedMergeTreeTableMetadata::checkEquals(const ReplicatedMergeTreeTabl String parsed_zk_sampling_expression = formattedAST(KeyDescription::parse(from_zk.sampling_expression, columns, context).definition_ast); if (sampling_expression != parsed_zk_sampling_expression) { - throw Exception( - "Existing table metadata in ZooKeeper differs in sample expression." - " Stored in ZooKeeper: " + from_zk.sampling_expression + - ", parsed from ZooKeeper: " + parsed_zk_sampling_expression + - ", local: " + sampling_expression, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in sample expression. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.sampling_expression, parsed_zk_sampling_expression, sampling_expression); } String parsed_zk_sorting_key = formattedAST(extractKeyExpressionList(KeyDescription::parse(from_zk.sorting_key, columns, context).definition_ast)); if (sorting_key != parsed_zk_sorting_key) { - throw Exception( - "Existing table metadata in ZooKeeper differs in sorting key expression." - " Stored in ZooKeeper: " + from_zk.sorting_key + - ", parsed from ZooKeeper: " + parsed_zk_sorting_key + - ", local: " + sorting_key, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in sorting key expression. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.sorting_key, parsed_zk_sorting_key, sorting_key); } auto parsed_primary_key = KeyDescription::parse(primary_key, columns, context); String parsed_zk_ttl_table = formattedAST(TTLTableDescription::parse(from_zk.ttl_table, columns, context, parsed_primary_key).definition_ast); if (ttl_table != parsed_zk_ttl_table) { - throw Exception( - "Existing table metadata in ZooKeeper differs in TTL." - " Stored in ZooKeeper: " + from_zk.ttl_table + - ", parsed from ZooKeeper: " + parsed_zk_ttl_table + - ", local: " + ttl_table, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in TTL. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.ttl_table, parsed_zk_ttl_table, ttl_table); } String parsed_zk_skip_indices = IndicesDescription::parse(from_zk.skip_indices, columns, context).toString(); if (skip_indices != parsed_zk_skip_indices) { - throw Exception( - "Existing table metadata in ZooKeeper differs in skip indexes." - " Stored in ZooKeeper: " + from_zk.skip_indices + - ", parsed from ZooKeeper: " + parsed_zk_skip_indices + - ", local: " + skip_indices, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in skip indexes. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.skip_indices, parsed_zk_skip_indices, skip_indices); } String parsed_zk_projections = ProjectionsDescription::parse(from_zk.projections, columns, context).toString(); if (projections != parsed_zk_projections) { - throw Exception( - "Existing table metadata in ZooKeeper differs in projections." - " Stored in ZooKeeper: " + from_zk.projections + - ", parsed from ZooKeeper: " + parsed_zk_projections + - ", local: " + projections, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in projections. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.projections, parsed_zk_projections, projections); } String parsed_zk_constraints = ConstraintsDescription::parse(from_zk.constraints).toString(); if (constraints != parsed_zk_constraints) { - throw Exception( - "Existing table metadata in ZooKeeper differs in constraints." - " Stored in ZooKeeper: " + from_zk.constraints + - ", parsed from ZooKeeper: " + parsed_zk_constraints + - ", local: " + constraints, - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, "Existing table metadata in ZooKeeper differs in constraints. " + "Stored in ZooKeeper: {}, parsed from ZooKeeper: {}, local: {}", + from_zk.constraints, parsed_zk_constraints, constraints); } if (from_zk.index_granularity_bytes_found_in_zk && index_granularity_bytes != from_zk.index_granularity_bytes) - throw Exception("Existing table metadata in ZooKeeper differs in index granularity bytes." - " Stored in ZooKeeper: " + DB::toString(from_zk.index_granularity_bytes) + - ", local: " + DB::toString(index_granularity_bytes), - ErrorCodes::METADATA_MISMATCH); + throw Exception(ErrorCodes::METADATA_MISMATCH, + "Existing table metadata in ZooKeeper differs in index granularity bytes. " + "Stored in ZooKeeper: {}, local: {}", from_zk.index_granularity_bytes, index_granularity_bytes); } ReplicatedMergeTreeTableMetadata::Diff diff --git a/src/Storages/MergeTree/checkDataPart.cpp b/src/Storages/MergeTree/checkDataPart.cpp index 6f9f16b6155..2ec83d99eeb 100644 --- a/src/Storages/MergeTree/checkDataPart.cpp +++ b/src/Storages/MergeTree/checkDataPart.cpp @@ -71,9 +71,8 @@ IMergeTreeDataPart::Checksums checkDataPart( } if (columns_txt != columns_list) - throw Exception("Columns doesn't match in part " + data_part_storage.getFullPath() - + ". Expected: " + columns_list.toString() - + ". Found: " + columns_txt.toString(), ErrorCodes::CORRUPTED_DATA); + throw Exception(ErrorCodes::CORRUPTED_DATA, "Columns doesn't match in part {}. Expected: {}. Found: {}", + data_part_storage.getFullPath(), columns_list.toString(), columns_txt.toString()); /// Real checksums based on contents of data. Must correspond to checksums.txt. If not - it means the data is broken. IMergeTreeDataPart::Checksums checksums_data; @@ -144,7 +143,7 @@ IMergeTreeDataPart::Checksums checkDataPart( } else { - throw Exception("Unknown type in part " + data_part_storage.getFullPath(), ErrorCodes::UNKNOWN_PART_TYPE); + throw Exception(ErrorCodes::UNKNOWN_PART_TYPE, "Unknown type in part {}", data_part_storage.getFullPath()); } /// Checksums from the rest files listed in checksums.txt. May be absent. If present, they are subsequently compared with the actual data checksums. diff --git a/src/Storages/MergeTree/localBackup.cpp b/src/Storages/MergeTree/localBackup.cpp index 3559eff1f6b..3b05e3df8d3 100644 --- a/src/Storages/MergeTree/localBackup.cpp +++ b/src/Storages/MergeTree/localBackup.cpp @@ -25,7 +25,7 @@ void localBackupImpl( return; if (level >= 1000) - throw DB::Exception("Too deep recursion", DB::ErrorCodes::TOO_DEEP_RECURSION); + throw DB::Exception(DB::ErrorCodes::TOO_DEEP_RECURSION, "Too deep recursion"); disk->createDirectories(destination_path); @@ -93,7 +93,8 @@ void localBackup( { if (disk->exists(destination_path) && !disk->isDirectoryEmpty(destination_path)) { - throw DB::Exception("Directory " + fullPath(disk, destination_path) + " already exists and is not empty.", DB::ErrorCodes::DIRECTORY_ALREADY_EXISTS); + throw DB::Exception(ErrorCodes::DIRECTORY_ALREADY_EXISTS, "Directory {} already exists and is not empty.", + DB::fullPath(disk, destination_path)); } size_t try_no = 0; diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 548b2a70ce7..a21d6ec399d 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -180,8 +180,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) else if (name_part == "VersionedCollapsing") merging_params.mode = MergeTreeData::MergingParams::VersionedCollapsing; else if (!name_part.empty()) - throw Exception( - "Unknown storage " + args.engine_name + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::UNKNOWN_STORAGE); + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown storage {}", + args.engine_name + getMergeTreeVerboseHelp(is_extended_storage_def)); /// NOTE Quite complicated. @@ -388,21 +388,20 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (ast_zk_path && ast_zk_path->value.getType() == Field::Types::String) zookeeper_path = ast_zk_path->value.safeGet(); else - throw Exception( - "Path in ZooKeeper must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Path in ZooKeeper must be a string literal{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); ++arg_num; ast_replica_name = engine_args[arg_num]->as(); if (ast_replica_name && ast_replica_name->value.getType() == Field::Types::String) replica_name = ast_replica_name->value.safeGet(); else - throw Exception( - "Replica name must be a string literal" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Replica name must be a string literal{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); if (replica_name.empty()) - throw Exception( - "No replica name in config" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::NO_REPLICA_NAME_GIVEN); + throw Exception(ErrorCodes::NO_REPLICA_NAME_GIVEN, "No replica name in config{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); ++arg_num; expand_macro(ast_zk_path, ast_replica_name); @@ -437,7 +436,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) arg_cnt += 2; } else - throw Exception("Expected two string literal arguments: zookeeper_path and replica_name", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected two string literal arguments: zookeeper_path and replica_name"); } /// This merging param maybe used as part of sorting key @@ -446,9 +445,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) { if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.sign_column)) - throw Exception( - "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sign column name must be an unquoted string{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); --arg_cnt; } else if (merging_params.mode == MergeTreeData::MergingParams::Replacing) @@ -457,9 +455,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (arg_cnt && !engine_args[arg_cnt - 1]->as()) { if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.version_column)) - throw Exception( - "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Version column name must be an unquoted string{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); --arg_cnt; } } @@ -475,19 +472,18 @@ static StoragePtr create(const StorageFactory::Arguments & args) else if (merging_params.mode == MergeTreeData::MergingParams::Graphite) { String graphite_config_name; - String error_msg - = "Last parameter of GraphiteMergeTree must be the name (in single quotes) of the element in configuration file with the Graphite options"; - error_msg += getMergeTreeVerboseHelp(is_extended_storage_def); + constexpr auto format_str = "Last parameter of GraphiteMergeTree must be the name (in single quotes) of the element in configuration file with the Graphite options{}"; + String error_msg = getMergeTreeVerboseHelp(is_extended_storage_def); if (const auto * ast = engine_args[arg_cnt - 1]->as()) { if (ast->value.getType() != Field::Types::String) - throw Exception(error_msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); graphite_config_name = ast->value.get(); } else - throw Exception(error_msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, format_str, error_msg); --arg_cnt; setGraphitePatternsFromConfig(args.getContext(), graphite_config_name, merging_params.graphite_params); @@ -495,16 +491,14 @@ static StoragePtr create(const StorageFactory::Arguments & args) else if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) { if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.version_column)) - throw Exception( - "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Version column name must be an unquoted string{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); --arg_cnt; if (!tryGetIdentifierNameInto(engine_args[arg_cnt - 1], merging_params.sign_column)) - throw Exception( - "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sign column name must be an unquoted string{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); --arg_cnt; /// Version collapsing is the only engine which add additional column to @@ -544,10 +538,9 @@ static StoragePtr create(const StorageFactory::Arguments & args) args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); if (!args.storage_def->order_by) - throw Exception( - "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " - "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "You must provide an ORDER BY or PRIMARY KEY expression in the table definition. " + "If you don't want this table to be sorted, use ORDER BY/PRIMARY KEY tuple()"); /// Get sorting key from engine arguments. /// @@ -625,9 +618,8 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// Syntax: *MergeTree(..., date, [sample_key], primary_key, index_granularity, ...) /// Get date: if (!tryGetIdentifierNameInto(engine_args[arg_num], date_column_name)) - throw Exception( - "Date column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Date column name must be an unquoted string{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); @@ -677,13 +669,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) } } else - throw Exception( - "Index granularity must be a positive integer" + getMergeTreeVerboseHelp(is_extended_storage_def), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Index granularity must be a positive integer{}", + getMergeTreeVerboseHelp(is_extended_storage_def)); ++arg_num; if (args.storage_def->ttl_table && !args.attach) - throw Exception("Table TTL is not allowed for MergeTree in old syntax", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table TTL is not allowed for MergeTree in old syntax"); } DataTypes data_types = metadata.partition_key.data_types; @@ -696,7 +687,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) } if (arg_num != arg_cnt) - throw Exception("Wrong number of engine arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Wrong number of engine arguments."); if (replicated) { diff --git a/src/Storages/MutationCommands.cpp b/src/Storages/MutationCommands.cpp index ffc2cfc3086..0c9e9223929 100644 --- a/src/Storages/MutationCommands.cpp +++ b/src/Storages/MutationCommands.cpp @@ -46,8 +46,9 @@ std::optional MutationCommand::parse(ASTAlterCommand * command, const auto & assignment = assignment_ast->as(); auto insertion = res.column_to_update_expression.emplace(assignment.column_name, assignment.expression()); if (!insertion.second) - throw Exception("Multiple assignments in the single statement to column " + backQuote(assignment.column_name), - ErrorCodes::MULTIPLE_ASSIGNMENTS_TO_COLUMN); + throw Exception(ErrorCodes::MULTIPLE_ASSIGNMENTS_TO_COLUMN, + "Multiple assignments in the single statement to column {}", + backQuote(assignment.column_name)); } return res; } @@ -188,7 +189,7 @@ void MutationCommands::readText(ReadBuffer & in) auto * command_ast = child->as(); auto command = MutationCommand::parse(command_ast, true); if (!command) - throw Exception("Unknown mutation command type: " + DB::toString(command_ast->type), ErrorCodes::UNKNOWN_MUTATION_COMMAND); + throw Exception(ErrorCodes::UNKNOWN_MUTATION_COMMAND, "Unknown mutation command type: {}", DB::toString(command_ast->type)); push_back(std::move(*command)); } } diff --git a/src/Storages/NATS/StorageNATS.cpp b/src/Storages/NATS/StorageNATS.cpp index cb5dff7d082..f1724b8c14c 100644 --- a/src/Storages/NATS/StorageNATS.cpp +++ b/src/Storages/NATS/StorageNATS.cpp @@ -291,7 +291,7 @@ void StorageNATS::read( size_t /* num_streams */) { if (!consumers_ready) - throw Exception("NATS consumers setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_NATS); + throw Exception(ErrorCodes::CANNOT_CONNECT_NATS, "NATS consumers setup not finished. Connection might be lost"); if (num_created_consumers == 0) return; @@ -364,8 +364,9 @@ SinkToStoragePtr StorageNATS::write(const ASTPtr &, const StorageMetadataPtr & m if (subjects.size() > 1) { throw Exception( - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "This NATS engine reads from multiple subjects. You must specify `stream_like_engine_insert_queue` to choose the subject to write to"); + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "This NATS engine reads from multiple subjects. " + "You must specify `stream_like_engine_insert_queue` to choose the subject to write to"); } else { @@ -616,7 +617,7 @@ bool StorageNATS::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); // Create an INSERT query for streaming data auto insert = std::make_shared(); @@ -717,14 +718,13 @@ void registerStorageNATS(StorageFactory & factory) nats_settings->loadFromQuery(*args.storage_def); if (!nats_settings->nats_url.changed && !nats_settings->nats_server_list.changed) - throw Exception( - "You must specify either `nats_url` or `nats_server_list` settings", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify either `nats_url` or `nats_server_list` settings"); if (!nats_settings->nats_format.changed) - throw Exception("You must specify `nats_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_format` setting"); if (!nats_settings->nats_subjects.changed) - throw Exception("You must specify `nats_subjects` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `nats_subjects` setting"); return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(nats_settings), args.attach); }; diff --git a/src/Storages/PartitionCommands.cpp b/src/Storages/PartitionCommands.cpp index 9a54f6cc04b..92aea597ab3 100644 --- a/src/Storages/PartitionCommands.cpp +++ b/src/Storages/PartitionCommands.cpp @@ -67,7 +67,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * res.move_destination_type = PartitionCommand::MoveDestinationType::SHARD; break; case DataDestinationType::DELETE: - throw Exception("ALTER with this destination type is not handled. This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "ALTER with this destination type is not handled. This is a bug."); } if (res.move_destination_type != PartitionCommand::MoveDestinationType::TABLE) res.move_destination_name = command_ast->move_destination_name; @@ -163,7 +163,7 @@ std::string PartitionCommand::typeToString() const case PartitionCommand::Type::REPLACE_PARTITION: return "REPLACE PARTITION"; default: - throw Exception("Uninitialized partition command", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Uninitialized partition command"); } } diff --git a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp index f0c5807f89c..574b5d76bbe 100644 --- a/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp +++ b/src/Storages/PostgreSQL/MaterializedPostgreSQLConsumer.cpp @@ -106,11 +106,13 @@ void MaterializedPostgreSQLConsumer::assertCorrectInsertion(StorageData::Buffer || column_idx >= buffer.description.types.size() || column_idx >= buffer.columns.size()) throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Attempt to insert into buffer at position: {}, but block columns size is {}, types size: {}, columns size: {}, buffer structure: {}", - column_idx, - buffer.description.sample_block.columns(), buffer.description.types.size(), buffer.columns.size(), - buffer.description.sample_block.dumpStructure()); + ErrorCodes::LOGICAL_ERROR, + "Attempt to insert into buffer at position: " + "{}, but block columns size is {}, types size: {}, columns size: {}, buffer structure: {}", + column_idx, + buffer.description.sample_block.columns(), + buffer.description.types.size(), buffer.columns.size(), + buffer.description.sample_block.dumpStructure()); } diff --git a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp index b81e029acff..2f290c93591 100644 --- a/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp +++ b/src/Storages/PostgreSQL/PostgreSQLReplicationHandler.cpp @@ -229,7 +229,8 @@ void PostgreSQLReplicationHandler::startSynchronization(bool throw_on_error) { if (user_provided_snapshot.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Using a user-defined replication slot must be provided with a snapshot from EXPORT SNAPSHOT when the slot is created." + "Using a user-defined replication slot must " + "be provided with a snapshot from EXPORT SNAPSHOT when the slot is created." "Pass it to `materialized_postgresql_snapshot` setting"); snapshot_name = user_provided_snapshot; } diff --git a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp index 6d12960824a..a30dd6deb77 100644 --- a/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp +++ b/src/Storages/PostgreSQL/StorageMaterializedPostgreSQL.cpp @@ -552,7 +552,7 @@ void registerStorageMaterializedPostgreSQL(StorageFactory & factory) args.storage_def->set(args.storage_def->order_by, args.storage_def->primary_key->clone()); if (!args.storage_def->order_by) - throw Exception("Storage MaterializedPostgreSQL needs order by key or primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL needs order by key or primary key"); if (args.storage_def->primary_key) metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.getContext()); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 969577fdf3f..3d93894eaff 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -91,13 +91,13 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const const auto * projection_definition = definition_ast->as(); if (!projection_definition) - throw Exception("Cannot create projection from non ASTProjectionDeclaration AST", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Cannot create projection from non ASTProjectionDeclaration AST"); if (projection_definition->name.empty()) - throw Exception("Projection must have name in definition.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Projection must have name in definition."); if (!projection_definition->query) - throw Exception("QUERY is required for projection", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "QUERY is required for projection"); ProjectionDescription result; result.definition_ast = projection_definition->clone(); @@ -123,8 +123,7 @@ ProjectionDescription::getProjectionFromAST(const ASTPtr & definition_ast, const if (select.hasAggregation()) { if (query.orderBy()) - throw Exception( - "When aggregation is used in projection, ORDER BY cannot be specified", ErrorCodes::ILLEGAL_PROJECTION); + throw Exception(ErrorCodes::ILLEGAL_PROJECTION, "When aggregation is used in projection, ORDER BY cannot be specified"); result.type = ProjectionDescription::Type::Aggregate; if (const auto & group_expression_list = query_select.groupBy()) @@ -242,7 +241,7 @@ ProjectionDescription ProjectionDescription::getMinMaxCountProjection( result.sample_block_for_keys.insert({nullptr, key.type, key.name}); auto it = partition_column_name_to_value_index.find(key.name); if (it == partition_column_name_to_value_index.end()) - throw Exception("minmax_count projection can only have keys about partition columns. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "minmax_count projection can only have keys about partition columns. It's a bug"); result.partition_value_indices.push_back(it->second); } } @@ -295,7 +294,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) Block ret; executor.pull(ret); if (executor.pull(ret)) - throw Exception("Projection cannot increase the number of rows in a block. It's a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Projection cannot increase the number of rows in a block. It's a bug"); return ret; } @@ -354,8 +353,8 @@ void ProjectionsDescription::add(ProjectionDescription && projection, const Stri { if (if_not_exists) return; - throw Exception( - "Cannot add projection " + projection.name + ": projection with this name already exists", ErrorCodes::ILLEGAL_PROJECTION); + throw Exception(ErrorCodes::ILLEGAL_PROJECTION, "Cannot add projection {}: projection with this name already exists", + projection.name); } auto insert_it = projections.cend(); diff --git a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp index 04decb91f7d..c23ef063145 100644 --- a/src/Storages/RabbitMQ/StorageRabbitMQ.cpp +++ b/src/Storages/RabbitMQ/StorageRabbitMQ.cpp @@ -216,7 +216,7 @@ AMQP::ExchangeType StorageRabbitMQ::defineExchangeType(String exchange_type_) else if (exchange_type_ == ExchangeType::TOPIC) type = AMQP::ExchangeType::topic; else if (exchange_type_ == ExchangeType::HASH) type = AMQP::ExchangeType::consistent_hash; else if (exchange_type_ == ExchangeType::HEADERS) type = AMQP::ExchangeType::headers; - else throw Exception("Invalid exchange type", ErrorCodes::BAD_ARGUMENTS); + else throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid exchange type"); } else { @@ -404,8 +404,9 @@ void StorageRabbitMQ::initExchange(AMQP::TcpChannel & rabbit_channel) /// This error can be a result of attempt to declare exchange if it was already declared but /// 1) with different exchange type. /// 2) with different exchange settings. - throw Exception("Unable to declare exchange. Make sure specified exchange is not already declared. Error: " - + std::string(message), ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE); + throw Exception(ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, + "Unable to declare exchange. Make sure specified exchange is not already declared. Error: {}", + std::string(message)); }); rabbit_channel.declareExchange(bridge_exchange, AMQP::fanout, AMQP::durable | AMQP::autodelete) @@ -413,7 +414,8 @@ void StorageRabbitMQ::initExchange(AMQP::TcpChannel & rabbit_channel) { /// This error is not supposed to happen as this exchange name is always unique to type and its settings. throw Exception( - ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, "Unable to declare bridge exchange ({}). Reason: {}", bridge_exchange, std::string(message)); + ErrorCodes::CANNOT_DECLARE_RABBITMQ_EXCHANGE, + "Unable to declare bridge exchange ({}). Reason: {}", bridge_exchange, std::string(message)); }); if (!hash_exchange) @@ -548,10 +550,10 @@ void StorageRabbitMQ::bindQueue(size_t queue_id, AMQP::TcpChannel & rabbit_chann * max_block_size parameter. Solution: client should specify a different queue_base parameter or manually delete previously * declared queues via any of the various cli tools. */ - throw Exception("Failed to declare queue. Probably queue settings are conflicting: max_block_size, deadletter_exchange. Attempt \ - specifying differently those settings or use a different queue_base or manually delete previously declared queues, \ - which were declared with the same names. ERROR reason: " - + std::string(message), ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to declare queue. Probably queue settings are conflicting: " + "max_block_size, deadletter_exchange. Attempt specifying differently those settings " + "or use a different queue_base or manually delete previously declared queues, " + "which were declared with the same names. ERROR reason: {}", std::string(message)); }); AMQP::Table queue_settings; @@ -650,7 +652,7 @@ void StorageRabbitMQ::unbindExchange() }) .onError([&](const char * message) { - throw Exception("Unable to remove exchange. Reason: " + std::string(message), ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE); + throw Exception(ErrorCodes::CANNOT_REMOVE_RABBITMQ_EXCHANGE, "Unable to remove exchange. Reason: {}", std::string(message)); }); connection->getHandler().startBlockingLoop(); @@ -676,7 +678,7 @@ void StorageRabbitMQ::read( size_t /* num_streams */) { if (!rabbit_is_ready) - throw Exception("RabbitMQ setup not finished. Connection might be lost", ErrorCodes::CANNOT_CONNECT_RABBITMQ); + throw Exception(ErrorCodes::CANNOT_CONNECT_RABBITMQ, "RabbitMQ setup not finished. Connection might be lost"); if (num_created_consumers == 0) { @@ -686,7 +688,8 @@ void StorageRabbitMQ::read( } if (!local_context->getSettingsRef().stream_like_engine_allow_direct_select) - throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); + throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, + "Direct select is not allowed. To enable use setting `stream_like_engine_allow_direct_select`"); if (mv_attached) throw Exception(ErrorCodes::QUERY_NOT_ALLOWED, "Cannot read from StorageRabbitMQ with attached materialized views"); @@ -1056,7 +1059,7 @@ bool StorageRabbitMQ::streamToViews() auto table_id = getStorageID(); auto table = DatabaseCatalog::instance().getTable(table_id, getContext()); if (!table) - throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Engine table {} doesn't exist.", table_id.getNameForLogs()); // Create an INSERT query for streaming data auto insert = std::make_shared(); @@ -1194,11 +1197,11 @@ void registerStorageRabbitMQ(StorageFactory & factory) if (!rabbitmq_settings->rabbitmq_host_port.changed && !rabbitmq_settings->rabbitmq_address.changed) - throw Exception("You must specify either `rabbitmq_host_port` or `rabbitmq_address` settings", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "You must specify either `rabbitmq_host_port` or `rabbitmq_address` settings"); if (!rabbitmq_settings->rabbitmq_format.changed) - throw Exception("You must specify `rabbitmq_format` setting", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "You must specify `rabbitmq_format` setting"); return std::make_shared(args.table_id, args.getContext(), args.columns, std::move(rabbitmq_settings), args.attach); }; diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index 1d0c8e62d20..e0ef967d491 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -183,7 +183,7 @@ ReadInOrderOptimizer::ReadInOrderOptimizer( , query(query_) { if (elements_actions.size() != required_sort_description.size()) - throw Exception("Sizes of sort description and actions are mismatched", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Sizes of sort description and actions are mismatched"); /// Do not analyze joined columns. /// They may have aliases and come to description as is. diff --git a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp index b1b158a2aa5..47d036c943d 100644 --- a/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp +++ b/src/Storages/RocksDB/EmbeddedRocksDBSink.cpp @@ -51,12 +51,12 @@ void EmbeddedRocksDBSink::consume(Chunk chunk) } status = batch.Put(wb_key.str(), wb_value.str()); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } status = storage.rocksdb_ptr->Write(rocksdb::WriteOptions(), &batch); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } } diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp index 4e871300861..86c0dffa60d 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.cpp @@ -33,7 +33,6 @@ #include #include -#include #include @@ -141,8 +140,8 @@ public: if (!iterator->status().ok()) { - throw Exception("Engine " + getName() + " got error while seeking key value data: " + iterator->status().ToString(), - ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "Engine {} got error while seeking key value data: {}", + getName(), iterator->status().ToString()); } Block block = sample_block.cloneWithColumns(std::move(columns)); return Chunk(block.getColumns(), block.rows()); @@ -262,12 +261,12 @@ void StorageEmbeddedRocksDB::mutate(const MutationCommands & commands, ContextPt column_it->type->getDefaultSerialization()->serializeBinary(*column, i, wb_key, {}); auto status = batch.Delete(wb_key.str()); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } auto status = rocksdb_ptr->Write(rocksdb::WriteOptions(), &batch); if (!status.ok()) - throw Exception("RocksDB write error: " + status.ToString(), ErrorCodes::ROCKSDB_ERROR); + throw Exception(ErrorCodes::ROCKSDB_ERROR, "RocksDB write error: {}", status.ToString()); } return; @@ -459,8 +458,10 @@ static StoragePtr create(const StorageFactory::Arguments & args) auto engine_args = args.engine_args; if (engine_args.size() > 3) { - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} requires at most 3 parameters. ({} given). Correct usage: EmbeddedRocksDB([ttl, rocksdb_dir, read_only])", - args.engine_name, engine_args.size()); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Engine {} requires at most 3 parameters. " + "({} given). Correct usage: EmbeddedRocksDB([ttl, rocksdb_dir, read_only])", + args.engine_name, engine_args.size()); } Int32 ttl{0}; @@ -478,20 +479,20 @@ static StoragePtr create(const StorageFactory::Arguments & args) metadata.setConstraints(args.constraints); if (!args.storage_def->primary_key) - throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.getContext()); auto primary_key_names = metadata.getColumnsRequiredForPrimaryKey(); if (primary_key_names.size() != 1) { - throw Exception("StorageEmbeddedRocksDB must require one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageEmbeddedRocksDB must require one column in primary key"); } return std::make_shared(args.table_id, args.relative_data_path, metadata, args.attach, args.getContext(), primary_key_names[0], ttl, std::move(rocksdb_dir), read_only); } std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistics() const { - std::shared_lock lock(rocksdb_ptr_mx); + std::shared_lock lock(rocksdb_ptr_mx); if (!rocksdb_ptr) return nullptr; return rocksdb_ptr->GetOptions().statistics; @@ -499,7 +500,7 @@ std::shared_ptr StorageEmbeddedRocksDB::getRocksDBStatistic std::vector StorageEmbeddedRocksDB::multiGet(const std::vector & slices_keys, std::vector & values) const { - std::shared_lock lock(rocksdb_ptr_mx); + std::shared_lock lock(rocksdb_ptr_mx); if (!rocksdb_ptr) return {}; return rocksdb_ptr->MultiGet(rocksdb::ReadOptions(), slices_keys, &values); diff --git a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h index 02938fb5f69..7f6fc49fb18 100644 --- a/src/Storages/RocksDB/StorageEmbeddedRocksDB.h +++ b/src/Storages/RocksDB/StorageEmbeddedRocksDB.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include #include @@ -86,7 +86,7 @@ private: const String primary_key; using RocksDBPtr = std::unique_ptr; RocksDBPtr rocksdb_ptr; - mutable std::shared_mutex rocksdb_ptr_mx; + mutable SharedMutex rocksdb_ptr_mx; String rocksdb_dir; Int32 ttl; bool read_only; diff --git a/src/Storages/SelectQueryDescription.cpp b/src/Storages/SelectQueryDescription.cpp index 2cc8f769cf1..37615a4187a 100644 --- a/src/Storages/SelectQueryDescription.cpp +++ b/src/Storages/SelectQueryDescription.cpp @@ -64,8 +64,7 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPt "StorageMaterializedView cannot be created from table functions ({})", serializeAST(*subquery)); if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", - ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); auto & inner_query = ast_select->list_of_selects->children.at(0); @@ -79,7 +78,7 @@ StorageID extractDependentTableFromSelectQuery(ASTSelectQuery & query, ContextPt void checkAllowedQueries(const ASTSelectQuery & query) { if (query.prewhere() || query.final() || query.sampleSize()) - throw Exception("MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL.", DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(DB::ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "MATERIALIZED VIEW cannot have PREWHERE, SAMPLE or FINAL."); ASTPtr subquery = extractTableExpression(query, 0); if (!subquery) @@ -88,7 +87,7 @@ void checkAllowedQueries(const ASTSelectQuery & query) if (const auto * ast_select = subquery->as()) { if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); const auto & inner_query = ast_select->list_of_selects->children.at(0); @@ -119,7 +118,7 @@ SelectQueryDescription SelectQueryDescription::getSelectQueryFromASTForMatView(c ASTPtr new_inner_query; if (!isSingleSelect(select, new_inner_query)) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); auto & select_query = new_inner_query->as(); checkAllowedQueries(select_query); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index cfdf2a630f9..4d3e46a36f5 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -111,7 +111,7 @@ StoragePtr StorageBuffer::getDestinationTable() const auto destination = DatabaseCatalog::instance().tryGetTable(destination_id, getContext()); if (destination.get() == this) - throw Exception("Destination table is myself. Will lead to infinite loop.", ErrorCodes::INFINITE_LOOP); + throw Exception(ErrorCodes::INFINITE_LOOP, "Destination table is myself. Will lead to infinite loop."); return destination; } @@ -518,7 +518,7 @@ static void appendBlock(Poco::Logger * log, const Block & from, Block & to) } /// But if there is still nothing, abort if (!col_to) - throw Exception("No column to rollback", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "No column to rollback"); if (col_to->size() != old_rows) col_to = col_to->cut(0, old_rows); } @@ -563,7 +563,7 @@ public: { destination = DatabaseCatalog::instance().tryGetTable(storage.destination_id, storage.getContext()); if (destination.get() == &storage) - throw Exception("Destination table is myself. Write will cause infinite loop.", ErrorCodes::INFINITE_LOOP); + throw Exception(ErrorCodes::INFINITE_LOOP, "Destination table is myself. Write will cause infinite loop."); } size_t bytes = block.bytes(); @@ -720,13 +720,13 @@ bool StorageBuffer::optimize( ContextPtr /*context*/) { if (partition) - throw Exception("Partition cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Partition cannot be specified when optimizing table of type Buffer"); if (final) - throw Exception("FINAL cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "FINAL cannot be specified when optimizing table of type Buffer"); if (deduplicate) - throw Exception("DEDUPLICATE cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "DEDUPLICATE cannot be specified when optimizing table of type Buffer"); flushAllBuffers(false); return true; @@ -1081,9 +1081,10 @@ void registerStorageBuffer(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() < 9 || engine_args.size() > 12) - throw Exception("Storage Buffer requires from 9 to 12 parameters: " - " destination_database, destination_table, num_buckets, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes].", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Buffer requires from 9 to 12 parameters: " + " destination_database, destination_table, num_buckets, min_time, max_time, min_rows, " + "max_rows, min_bytes, max_bytes[, flush_time, flush_rows, flush_bytes]."); // Table and database name arguments accept expressions, evaluate them. engine_args[0] = evaluateConstantExpressionForDatabaseName(engine_args[0], args.getLocalContext()); diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index a76c4dffb5b..0eb3eb6b94e 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -342,8 +342,7 @@ void registerStorageDictionary(StorageFactory & factory) /// Create dictionary storage that is view of underlying dictionary if (args.engine_args.size() != 1) - throw Exception("Storage Dictionary requires single parameter: name of dictionary", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage Dictionary requires single parameter: name of dictionary"); args.engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(args.engine_args[0], local_context); String dictionary_name = checkAndGetLiteralArgument(args.engine_args[0], "dictionary_name"); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 0b48bb70286..94e52059daa 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -394,8 +394,10 @@ StorageDistributed::StorageDistributed( size_t num_local_shards = getCluster()->getLocalShardCount(); if (num_local_shards && (remote_database.empty() || remote_database == id_.database_name) && remote_table == id_.table_name) - throw Exception("Distributed table " + id_.table_name + " looks at itself", ErrorCodes::INFINITE_LOOP); + throw Exception(ErrorCodes::INFINITE_LOOP, "Distributed table {} looks at itself", id_.table_name); } + + initializeFromDisk(); } @@ -480,7 +482,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( /// NOTE: distributed_group_by_no_merge=1 does not respect distributed_push_down_limit /// (since in this case queries processed separately and the initiator is just a proxy in this case). if (to_stage != QueryProcessingStage::Complete) - throw Exception("Queries with distributed_group_by_no_merge=1 should be processed to Complete stage", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Queries with distributed_group_by_no_merge=1 should be processed to Complete stage"); return QueryProcessingStage::Complete; } } @@ -766,7 +768,7 @@ void StorageDistributed::read( /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier. if (!query_plan.isInitialized()) - throw Exception("Pipeline is not initialized", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Pipeline is not initialized"); if (local_context->getSettingsRef().allow_experimental_analyzer) { @@ -794,8 +796,8 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata /// Ban an attempt to make async insert into the table belonging to DatabaseMemory if (!storage_policy && !owned_cluster && !settings.insert_distributed_sync && !settings.insert_shard_id) { - throw Exception("Storage " + getName() + " must have own data directory to enable asynchronous inserts", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage {} must have own data directory to enable asynchronous inserts", + getName()); } auto shard_num = cluster->getLocalShardCount() + cluster->getRemoteShardCount(); @@ -803,14 +805,13 @@ SinkToStoragePtr StorageDistributed::write(const ASTPtr &, const StorageMetadata /// If sharding key is not specified, then you can only write to a shard containing only one shard if (!settings.insert_shard_id && !settings.insert_distributed_one_random_shard && !has_sharding_key && shard_num >= 2) { - throw Exception( - "Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided", - ErrorCodes::STORAGE_REQUIRES_PARAMETER); + throw Exception(ErrorCodes::STORAGE_REQUIRES_PARAMETER, + "Method write is not supported by storage {} with more than one shard and no sharding key provided", getName()); } if (settings.insert_shard_id && settings.insert_shard_id > shard_num) { - throw Exception("Shard id should be range from 1 to shard number", ErrorCodes::INVALID_SHARD_ID); + throw Exception(ErrorCodes::INVALID_SHARD_ID, "Shard id should be range from 1 to shard number"); } /// Force sync insertion if it is remote() table function @@ -917,8 +918,8 @@ std::optional StorageDistributed::distributedWriteBetweenDistribu auto timeouts = ConnectionTimeouts::getTCPTimeoutsWithFailover(settings); auto connections = shard_info.pool->getMany(timeouts, &settings, PoolMode::GET_ONE); if (connections.empty() || connections.front().isNull()) - throw Exception( - "Expected exactly one connection for shard " + toString(shard_info.shard_num), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected exactly one connection for shard {}", + shard_info.shard_num); /// INSERT SELECT query returns empty block auto remote_query_executor @@ -1006,7 +1007,7 @@ std::optional StorageDistributed::distributedWrite(const ASTInser { const Settings & settings = local_context->getSettingsRef(); if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); auto & select = query.select->as(); @@ -1041,7 +1042,8 @@ std::optional StorageDistributed::distributedWrite(const ASTInser if (local_context->getClientInfo().distributed_depth == 0) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parallel distributed INSERT SELECT is not possible. "\ - "Reason: distributed reading is supported only from Distributed engine or *Cluster table functions, but got {} storage", src_storage->getName()); + "Reason: distributed reading is supported only from Distributed engine " + "or *Cluster table functions, but got {} storage", src_storage->getName()); } return {}; @@ -1084,8 +1086,7 @@ void StorageDistributed::alter(const AlterCommands & params, ContextPtr local_co setInMemoryMetadata(new_metadata); } - -void StorageDistributed::startup() +void StorageDistributed::initializeFromDisk() { if (!storage_policy) return; @@ -1134,6 +1135,7 @@ void StorageDistributed::shutdown() cluster_nodes_data.clear(); LOG_DEBUG(log, "Background threads for async INSERT joined"); } + void StorageDistributed::drop() { // Some INSERT in-between shutdown() and drop() can call @@ -1288,20 +1290,14 @@ ClusterPtr StorageDistributed::getOptimizedCluster( } UInt64 force = settings.force_optimize_skip_unused_shards; - if (force) + if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS || (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY && has_sharding_key)) { - WriteBufferFromOwnString exception_message; if (!has_sharding_key) - exception_message << "No sharding key"; + throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "No sharding key"); else if (!sharding_key_is_usable) - exception_message << "Sharding key is not deterministic"; + throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "Sharding key is not deterministic"); else - exception_message << "Sharding key " << sharding_key_column_name << " is not used"; - - if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS) - throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS); - if (force == FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY && has_sharding_key) - throw Exception(exception_message.str(), ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS); + throw Exception(ErrorCodes::UNABLE_TO_SKIP_UNUSED_SHARDS, "Sharding key {} is not used", sharding_key_column_name); } return {}; @@ -1330,7 +1326,7 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c #undef CREATE_FOR_TYPE - throw Exception{"Sharding key expression does not evaluate to an integer type", ErrorCodes::TYPE_MISMATCH}; + throw Exception(ErrorCodes::TYPE_MISMATCH, "Sharding key expression does not evaluate to an integer type"); } /// Returns a new cluster with fewer shards if constant folding for `sharding_key_expr` is possible @@ -1389,7 +1385,7 @@ ClusterPtr StorageDistributed::skipUnusedShards( for (const auto & block : *blocks) { if (!block.has(sharding_key_column_name)) - throw Exception("sharding_key_expr should evaluate as a single row", ErrorCodes::TOO_MANY_ROWS); + throw Exception(ErrorCodes::TOO_MANY_ROWS, "sharding_key_expr should evaluate as a single row"); const ColumnWithTypeAndName & result = block.getByName(sharding_key_column_name); const auto selector = createSelector(cluster, result); @@ -1563,14 +1559,11 @@ void registerStorageDistributed(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() < 3 || engine_args.size() > 5) - throw Exception( - "Storage Distributed requires from 3 to 5 parameters - " - "name of configuration section with list of remote servers, " - "name of remote database, " - "name of remote table, " - "sharding key expression (optional), " - "policy to store data in (optional).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Distributed requires from 3 " + "to 5 parameters - name of configuration section with list " + "of remote servers, name of remote database, name " + "of remote table, sharding key expression (optional), policy to store data in (optional)."); String cluster_name = getClusterNameAndMakeLiteral(engine_args[0]); @@ -1598,13 +1591,13 @@ void registerStorageDistributed(StorageFactory & factory) const Block & block = sharding_expr->getSampleBlock(); if (block.columns() != 1) - throw Exception("Sharding expression must return exactly one column", ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS); + throw Exception(ErrorCodes::INCORRECT_NUMBER_OF_COLUMNS, "Sharding expression must return exactly one column"); auto type = block.getByPosition(0).type; if (!type->isValueRepresentedByInteger()) - throw Exception("Sharding expression has type " + type->getName() + - ", but should be one of integer type", ErrorCodes::TYPE_MISMATCH); + throw Exception(ErrorCodes::TYPE_MISMATCH, "Sharding expression has type {}, but should be one of integer type", + type->getName()); } /// TODO: move some arguments from the arguments to the SETTINGS. diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 55b0535d5e6..66fd7b77889 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -133,7 +133,7 @@ public: /// the structure of the sub-table is not checked void alter(const AlterCommands & params, ContextPtr context, AlterLockHolder & table_lock_holder) override; - void startup() override; + void initializeFromDisk(); void shutdown() override; void flush() override; void drop() override; diff --git a/src/Storages/StorageExternalDistributed.cpp b/src/Storages/StorageExternalDistributed.cpp index a40ae12b2d1..9eeb619b899 100644 --- a/src/Storages/StorageExternalDistributed.cpp +++ b/src/Storages/StorageExternalDistributed.cpp @@ -233,7 +233,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) { ASTs & engine_args = args.engine_args; if (engine_args.size() < 2) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine ExternalDistributed must have at least 2 arguments: engine_name, named_collection and/or description"); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Engine ExternalDistributed must have at least 2 arguments: " + "engine_name, named_collection and/or description"); auto engine_name = checkAndGetLiteralArgument(engine_args[0], "engine_name"); StorageExternalDistributed::ExternalStorageEngine table_engine; @@ -245,8 +247,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) table_engine = StorageExternalDistributed::ExternalStorageEngine::PostgreSQL; else throw Exception(ErrorCodes::BAD_ARGUMENTS, - "External storage engine {} is not supported for StorageExternalDistributed. Supported engines are: MySQL, PostgreSQL, URL", - engine_name); + "External storage engine {} is not supported for StorageExternalDistributed. " + "Supported engines are: MySQL, PostgreSQL, URL", + engine_name); ASTs inner_engine_args(engine_args.begin() + 1, engine_args.end()); String cluster_description; @@ -308,10 +311,9 @@ void registerStorageExternalDistributed(StorageFactory & factory) else { if (engine_args.size() != 6) - throw Exception( - "Storage ExternalDistributed requires 5 parameters: " - "ExternalDistributed('engine_name', 'cluster_description', 'database', 'table', 'user', 'password').", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage ExternalDistributed requires 5 parameters: " + "ExternalDistributed('engine_name', 'cluster_description', 'database', 'table', 'user', 'password')."); cluster_description = checkAndGetLiteralArgument(engine_args[1], "cluster_description"); configuration.database = checkAndGetLiteralArgument(engine_args[2], "database"); diff --git a/src/Storages/StorageFactory.cpp b/src/Storages/StorageFactory.cpp index c2f6fb1608d..ff141bf108f 100644 --- a/src/Storages/StorageFactory.cpp +++ b/src/Storages/StorageFactory.cpp @@ -27,7 +27,7 @@ static void checkAllTypesAreAllowedInTable(const NamesAndTypesList & names_and_t { for (const auto & elem : names_and_types) if (elem.type->cannotBeStoredInTables()) - throw Exception("Data type " + elem.type->getName() + " cannot be used in tables", ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_TABLES); + throw Exception(ErrorCodes::DATA_TYPE_CANNOT_BE_USED_IN_TABLES, "Data type {} cannot be used in tables", elem.type->getName()); } @@ -35,7 +35,7 @@ ContextMutablePtr StorageFactory::Arguments::getContext() const { auto ptr = context.lock(); if (!ptr) - throw Exception("Context has expired", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Context has expired"); return ptr; } @@ -43,7 +43,7 @@ ContextMutablePtr StorageFactory::Arguments::getLocalContext() const { auto ptr = local_context.lock(); if (!ptr) - throw Exception("Context has expired", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Context has expired"); return ptr; } @@ -51,8 +51,7 @@ ContextMutablePtr StorageFactory::Arguments::getLocalContext() const void StorageFactory::registerStorage(const std::string & name, CreatorFn creator_fn, StorageFeatures features) { if (!storages.emplace(name, Creator{std::move(creator_fn), features}).second) - throw Exception("TableFunctionFactory: the table function name '" + name + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: the table function name '{}' is not unique", name); } @@ -74,21 +73,21 @@ StoragePtr StorageFactory::get( if (query.is_ordinary_view) { if (query.storage) - throw Exception("Specifying ENGINE is not allowed for a View", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a View"); name = "View"; } else if (query.is_live_view) { if (query.storage) - throw Exception("Specifying ENGINE is not allowed for a LiveView", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a LiveView"); name = "LiveView"; } else if (query.is_dictionary) { if (query.storage) - throw Exception("Specifying ENGINE is not allowed for a Dictionary", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a Dictionary"); name = "Dictionary"; } @@ -109,16 +108,15 @@ StoragePtr StorageFactory::get( else { if (!query.storage) - throw Exception("Incorrect CREATE query: storage required", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Incorrect CREATE query: storage required"); if (!storage_def->engine) - throw Exception("Incorrect CREATE query: ENGINE required", ErrorCodes::ENGINE_REQUIRED); + throw Exception(ErrorCodes::ENGINE_REQUIRED, "Incorrect CREATE query: ENGINE required"); const ASTFunction & engine_def = *storage_def->engine; if (engine_def.parameters) - throw Exception( - "Engine definition cannot take the form of a parametric function", ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS); + throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Engine definition cannot take the form of a parametric function"); if (engine_def.arguments) has_engine_args = true; @@ -127,27 +125,25 @@ StoragePtr StorageFactory::get( if (name == "View") { - throw Exception( - "Direct creation of tables with ENGINE View is not supported, use CREATE VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Direct creation of tables with ENGINE View is not supported, use CREATE VIEW statement"); } else if (name == "MaterializedView") { - throw Exception( - "Direct creation of tables with ENGINE MaterializedView is not supported, use CREATE MATERIALIZED VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Direct creation of tables with ENGINE MaterializedView " + "is not supported, use CREATE MATERIALIZED VIEW statement"); } else if (name == "LiveView") { - throw Exception( - "Direct creation of tables with ENGINE LiveView is not supported, use CREATE LIVE VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Direct creation of tables with ENGINE LiveView " + "is not supported, use CREATE LIVE VIEW statement"); } else if (name == "WindowView") { - throw Exception( - "Direct creation of tables with ENGINE WindowView is not supported, use CREATE WINDOW VIEW statement", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "Direct creation of tables with ENGINE WindowView " + "is not supported, use CREATE WINDOW VIEW statement"); } auto it = storages.find(name); @@ -155,17 +151,16 @@ StoragePtr StorageFactory::get( { auto hints = getHints(name); if (!hints.empty()) - throw Exception("Unknown table engine " + name + ". Maybe you meant: " + toString(hints), ErrorCodes::UNKNOWN_STORAGE); + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown table engine {}. Maybe you meant: {}", name, toString(hints)); else - throw Exception("Unknown table engine " + name, ErrorCodes::UNKNOWN_STORAGE); + throw Exception(ErrorCodes::UNKNOWN_STORAGE, "Unknown table engine {}", name); } auto check_feature = [&](String feature_description, FeatureMatcherFn feature_matcher_fn) { if (!feature_matcher_fn(it->second.features)) { - String msg = "Engine " + name + " doesn't support " + feature_description + ". " - "Currently only the following engines have support for the feature: ["; + String msg; auto supporting_engines = getAllRegisteredNamesByFeatureMatcherFn(feature_matcher_fn); for (size_t index = 0; index < supporting_engines.size(); ++index) { @@ -173,8 +168,9 @@ StoragePtr StorageFactory::get( msg += ", "; msg += supporting_engines[index]; } - msg += "]"; - throw Exception(msg, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Engine {} doesn't support {}. " + "Currently only the following engines have support for the feature: [{}]", + name, feature_description, msg); } }; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 419929dbef3..e2a2f84bc72 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -49,6 +49,7 @@ #include #include #include +#include namespace ProfileEvents @@ -186,7 +187,7 @@ void checkCreationIsAllowed( { auto table_path_stat = fs::status(table_path); if (fs::exists(table_path_stat) && fs::is_directory(table_path_stat)) - throw Exception("File must not be a directory", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "File must not be a directory"); } } @@ -365,8 +366,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (format == "Distributed") { if (paths.empty()) - throw Exception( - "Cannot get table structure from file, because no files match specified name", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Cannot get table structure from file, because no files match specified name"); auto source = StorageDistributedDirectoryMonitor::createSourceFromFile(paths[0]); return ColumnsDescription(source->getOutputs().front().getHeader().getNamesAndTypesList()); @@ -375,9 +375,8 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (paths.empty() && !FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format)) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path. You must specify " - "table structure manually", - format); + "Cannot extract table structure from {} format file, because there are no files with provided path. " + "You must specify table structure manually", format); std::optional columns_from_cache; if (context->getSettingsRef().schema_inference_use_cache_for_file) @@ -418,9 +417,9 @@ StorageFile::StorageFile(int table_fd_, CommonArguments args) total_bytes_to_read = buf.st_size; if (args.getContext()->getApplicationType() == Context::ApplicationType::SERVER) - throw Exception("Using file descriptor as source of storage isn't allowed for server daemons", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Using file descriptor as source of storage isn't allowed for server daemons"); if (args.format_name == "Distributed") - throw Exception("Distributed format is allowed only with explicit file path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Distributed format is allowed only with explicit file path"); is_db_table = false; use_table_fd = true; @@ -446,9 +445,9 @@ StorageFile::StorageFile(const std::string & relative_table_dir_path, CommonArgu : StorageFile(args) { if (relative_table_dir_path.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Storage {} requires data path", getName()); if (args.format_name == "Distributed") - throw Exception("Distributed format is allowed only with explicit file path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Distributed format is allowed only with explicit file path"); String table_dir_path = fs::path(base_path) / relative_table_dir_path / ""; fs::create_directories(table_dir_path); @@ -486,7 +485,7 @@ void StorageFile::setStorageMetadata(CommonArguments args) { columns = getTableStructureFromFile(format_name, paths, compression_method, format_settings, args.getContext()); if (!args.columns.empty() && args.columns != columns) - throw Exception("Table structure and file structure are different", ErrorCodes::INCOMPATIBLE_COLUMNS); + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Table structure and file structure are different"); } storage_metadata.setColumns(columns); } @@ -571,7 +570,7 @@ public: { shared_lock = std::shared_lock(storage->rwlock, getLockTimeout(context)); if (!shared_lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); } } @@ -715,7 +714,7 @@ Pipe StorageFile::read( if (context->getSettingsRef().engine_file_empty_if_not_exists) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); else - throw Exception("File " + paths[0] + " doesn't exist", ErrorCodes::FILE_DOESNT_EXIST); + throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "File {} doesn't exist", paths[0]); } } @@ -854,7 +853,7 @@ public: , lock(std::move(lock_)) { if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); initialize(); } @@ -1024,7 +1023,7 @@ SinkToStoragePtr StorageFile::write( ContextPtr context) { if (format_name == "Distributed") - throw Exception("Method write is not implemented for Distributed format", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for Distributed format"); int flags = 0; @@ -1061,7 +1060,9 @@ SinkToStoragePtr StorageFile::write( if (!paths.empty()) { if (is_path_with_globs) - throw Exception("Table '" + getStorageID().getNameForLogs() + "' is in readonly mode because of globs in filepath", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Table '{}' is in readonly mode because of globs in filepath", + getStorageID().getNameForLogs()); path = paths.back(); fs::create_directories(fs::path(path).parent_path()); @@ -1119,17 +1120,18 @@ bool StorageFile::storesDataOnDisk() const Strings StorageFile::getDataPaths() const { if (paths.empty()) - throw Exception("Table '" + getStorageID().getNameForLogs() + "' is in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Table '{}' is in readonly mode", getStorageID().getNameForLogs()); return paths; } void StorageFile::rename(const String & new_path_to_table_data, const StorageID & new_table_id) { if (!is_db_table) - throw Exception("Can't rename table " + getStorageID().getNameForLogs() + " bounded to user-defined file (or FD)", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "Can't rename table {} bounded to user-defined file (or FD)", getStorageID().getNameForLogs()); if (paths.size() != 1) - throw Exception("Can't rename table " + getStorageID().getNameForLogs() + " in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Can't rename table {} in readonly mode", getStorageID().getNameForLogs()); std::string path_new = getTablePath(base_path + new_path_to_table_data, format_name); if (path_new == paths[0]) @@ -1149,7 +1151,7 @@ void StorageFile::truncate( TableExclusiveLockHolder &) { if (is_path_with_globs) - throw Exception("Can't truncate table '" + getStorageID().getNameForLogs() + "' in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "Can't truncate table '{}' in readonly mode", getStorageID().getNameForLogs()); if (use_table_fd) { @@ -1197,9 +1199,9 @@ void registerStorageFile(StorageFactory & factory) ASTs & engine_args_ast = factory_args.engine_args; if (!(engine_args_ast.size() >= 1 && engine_args_ast.size() <= 3)) // NOLINT - throw Exception( - "Storage File requires from 1 to 3 arguments: name of used format, source and compression_method.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage File requires from 1 to 3 arguments: " + "name of used format, source and compression_method."); engine_args_ast[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args_ast[0], factory_args.getLocalContext()); storage_args.format_name = checkAndGetLiteralArgument(engine_args_ast[0], "format_name"); @@ -1251,8 +1253,8 @@ void registerStorageFile(StorageFactory & factory) else if (*opt_name == "stderr") source_fd = STDERR_FILENO; else - throw Exception( - "Unknown identifier '" + *opt_name + "' in second arg of File storage constructor", ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier '{}' in second arg of File storage constructor", + *opt_name); } else if (const auto * literal = engine_args_ast[1]->as()) { @@ -1264,7 +1266,7 @@ void registerStorageFile(StorageFactory & factory) else if (type == Field::Types::String) source_path = literal->value.get(); else - throw Exception("Second argument must be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second argument must be path or file descriptor"); } if (engine_args_ast.size() == 3) diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index c00e82598b2..1b5a453ec21 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -374,7 +374,7 @@ ColumnPtr fillColumnWithRandomData( } default: - throw Exception("The 'GenerateRandom' is not implemented for type " + type->getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "The 'GenerateRandom' is not implemented for type {}", type->getName()); } } @@ -462,9 +462,9 @@ void registerStorageGenerateRandom(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() > 3) - throw Exception("Storage GenerateRandom requires at most three arguments: " - "random_seed, max_string_length, max_array_length.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage GenerateRandom requires at most three arguments: " + "random_seed, max_string_length, max_array_length."); std::optional random_seed; UInt64 max_string_length = 10; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index a80f21834db..f6550c6cd5d 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -80,7 +80,7 @@ void StorageInMemoryMetadata::setComment(const String & comment_) void StorageInMemoryMetadata::setColumns(ColumnsDescription columns_) { if (columns_.getAllPhysical().empty()) - throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED, "Empty list of columns passed"); columns = std::move(columns_); } @@ -552,9 +552,8 @@ void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const auto & provided_columns_map = getColumnsMap(provided_columns); if (column_names.empty()) - throw Exception( - "Empty list of columns queried. There are columns: " + listOfColumns(available_columns), - ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); + throw Exception(ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED, "Empty list of columns queried. There are columns: {}", + listOfColumns(available_columns)); UniqueStrings unique_names; @@ -606,7 +605,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const for (const auto & column : block) { if (names_in_block.contains(column.name)) - throw Exception("Duplicate column " + column.name + " in block", ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Duplicate column {} in block", column.name); names_in_block.insert(column.name); @@ -635,7 +634,7 @@ void StorageInMemoryMetadata::check(const Block & block, bool need_all) const for (const auto & available_column : available_columns) { if (!names_in_block.contains(available_column.name)) - throw Exception("Expected column " + available_column.name, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, "Expected column {}", available_column.name); } } } diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 18e8442c1b5..0ad3afb2e8a 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -70,7 +70,7 @@ Pipe StorageInput::read( } if (pipe.empty()) - throw Exception("Input stream is not initialized, input() must be used only in INSERT SELECT query", ErrorCodes::INVALID_USAGE_OF_INPUT); + throw Exception(ErrorCodes::INVALID_USAGE_OF_INPUT, "Input stream is not initialized, input() must be used only in INSERT SELECT query"); return std::move(pipe); } diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 320f05e038f..d6cc5199331 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -63,7 +63,7 @@ StorageJoin::StorageJoin( auto metadata_snapshot = getInMemoryMetadataPtr(); for (const auto & key : key_names) if (!metadata_snapshot->getColumns().hasPhysical(key)) - throw Exception{"Key column (" + key + ") does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE}; + throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Key column ({}) does not exist in table declaration.", key); table_join = std::make_shared(limits, use_nulls, kind, strictness, key_names); join = std::make_shared(table_join, getRightSampleBlock(), overwrite); @@ -101,7 +101,7 @@ void StorageJoin::checkMutationIsPossible(const MutationCommands & commands, con { for (const auto & command : commands) if (command.type != MutationCommand::DELETE) - throw Exception("Table engine Join supports only DELETE mutations", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Table engine Join supports only DELETE mutations"); } void StorageJoin::mutate(const MutationCommands & commands, ContextPtr context, bool /*force_wait*/) @@ -175,7 +175,8 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, getStorageID().getNameForLogs()); if (analyzed_join->getClauses().size() != 1) - throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "JOIN keys should match to the Join engine keys [{}]", fmt::join(getKeyNames(), ", ")); + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "JOIN keys should match to the Join engine keys [{}]", + fmt::join(getKeyNames(), ", ")); const auto & join_on = analyzed_join->getOnlyClause(); if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) @@ -319,16 +320,16 @@ void registerStorageJoin(StorageFactory & factory) persistent = setting.value.get(); } else - throw Exception("Unknown setting " + setting.name + " for storage " + args.engine_name, ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown setting {} for storage {}", setting.name, args.engine_name); } } DiskPtr disk = args.getContext()->getDisk(disk_name); if (engine_args.size() < 3) - throw Exception( - "Storage Join requires at least 3 parameters: Join(ANY|ALL|SEMI|ANTI, LEFT|INNER|RIGHT, keys...).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Join requires at least 3 parameters: " + "Join(ANY|ALL|SEMI|ANTI, LEFT|INNER|RIGHT, keys...)."); JoinStrictness strictness = JoinStrictness::Unspecified; JoinKind kind = JoinKind::Comma; @@ -353,8 +354,7 @@ void registerStorageJoin(StorageFactory & factory) } if (strictness == JoinStrictness::Unspecified) - throw Exception("First parameter of storage Join must be ANY or ALL or SEMI or ANTI (without quotes).", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "First parameter of storage Join must be ANY or ALL or SEMI or ANTI (without quotes)."); if (auto opt_kind_id = tryGetIdentifierName(engine_args[1])) { @@ -375,8 +375,7 @@ void registerStorageJoin(StorageFactory & factory) } if (kind == JoinKind::Comma) - throw Exception("Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes).", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter of storage Join must be LEFT or INNER or RIGHT or FULL (without quotes)."); Names key_names; key_names.reserve(engine_args.size() - 2); @@ -384,7 +383,7 @@ void registerStorageJoin(StorageFactory & factory) { auto opt_key = tryGetIdentifierName(engine_args[i]); if (!opt_key) - throw Exception("Parameter №" + toString(i + 1) + " of storage Join don't look like column name.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Parameter №{} of storage Join don't look like column name.", i + 1); key_names.push_back(*opt_key); } @@ -477,7 +476,7 @@ protected: Chunk chunk; if (!joinDispatch(join->kind, join->strictness, join->data->maps.front(), [&](auto kind, auto strictness, auto & map) { chunk = createChunk(map); })) - throw Exception("Logical error: unknown JOIN strictness", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unknown JOIN strictness"); return chunk; } @@ -512,8 +511,8 @@ private: #undef M default: - throw Exception("Unsupported JOIN keys in StorageJoin. Type: " + toString(static_cast(join->data->type)), - ErrorCodes::UNSUPPORTED_JOIN_KEYS); + throw Exception(ErrorCodes::UNSUPPORTED_JOIN_KEYS, "Unsupported JOIN keys in StorageJoin. Type: {}", + static_cast(join->data->type)); } if (!rows_added) @@ -587,7 +586,7 @@ private: fillAll(columns, column_indices, it, key_pos, rows_added); } else - throw Exception("This JOIN is not implemented yet", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This JOIN is not implemented yet"); if (rows_added >= max_block_size) { diff --git a/src/Storages/StorageKeeperMap.cpp b/src/Storages/StorageKeeperMap.cpp index bd255a952dc..923e807c5cd 100644 --- a/src/Storages/StorageKeeperMap.cpp +++ b/src/Storages/StorageKeeperMap.cpp @@ -264,9 +264,9 @@ StorageKeeperMap::StorageKeeperMap( metadata_string = out.str(); if (root_path.empty()) - throw Exception("root_path should not be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "root_path should not be empty"); if (!root_path.starts_with('/')) - throw Exception("root_path should start with '/'", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "root_path should start with '/'"); auto config_keys_limit = context_->getConfigRef().getUInt64("keeper_map_keys_limit", 0); if (config_keys_limit != 0 && (keys_limit == 0 || keys_limit > config_keys_limit)) @@ -397,7 +397,9 @@ StorageKeeperMap::StorageKeeperMap( return; } - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot create metadata for table, because it is removed concurrently or because of wrong root_path ({})", root_path); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot create metadata for table, because it is removed concurrently or because " + "of wrong root_path ({})", root_path); } @@ -763,12 +765,12 @@ StoragePtr create(const StorageFactory::Arguments & args) metadata.setConstraints(args.constraints); if (!args.storage_def->primary_key) - throw Exception("StorageKeeperMap requires one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageKeeperMap requires one column in primary key"); metadata.primary_key = KeyDescription::getKeyFromAST(args.storage_def->primary_key->ptr(), metadata.columns, args.getContext()); auto primary_key_names = metadata.getColumnsRequiredForPrimaryKey(); if (primary_key_names.size() != 1) - throw Exception("StorageKeeperMap requires one column in primary key", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "StorageKeeperMap requires one column in primary key"); return std::make_shared( args.getContext(), args.table_id, metadata, args.query.attach, primary_key_names[0], root_path, keys_limit); diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index b8920647244..7d445c0d7ec 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -213,7 +213,7 @@ void LogSource::readData(const NameAndTypePair & name_and_type, ColumnPtr & colu const auto & data_file_it = storage.data_files_by_names.find(data_file_name); if (data_file_it == storage.data_files_by_names.end()) - throw Exception("Logical error: no information about file " + data_file_name + " in StorageLog", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; size_t offset = stream_for_prefix ? 0 : offsets[data_file.index]; @@ -274,7 +274,7 @@ public: , lock(std::move(lock_)) { if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Ensure that marks are loaded because we're going to update them. storage.loadMarks(lock); @@ -417,8 +417,7 @@ ISerialization::OutputStreamGetter LogSink::createStreamGetter(const NameAndType String data_file_name = ISerialization::getFileNameForStream(name_and_type, path); auto it = streams.find(data_file_name); if (it == streams.end()) - throw Exception("Logical error: stream was not created when writing data in LogSink", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: stream was not created when writing data in LogSink"); Stream & stream = it->second; if (stream.written) @@ -443,7 +442,7 @@ void LogSink::writeData(const NameAndTypePair & name_and_type, const IColumn & c { const auto & data_file_it = storage.data_files_by_names.find(data_file_name); if (data_file_it == storage.data_files_by_names.end()) - throw Exception("Logical error: no information about file " + data_file_name + " in StorageLog", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no information about file {} in StorageLog", data_file_name); const auto & data_file = *data_file_it->second; const auto & columns = metadata_snapshot->getColumns(); @@ -553,7 +552,7 @@ StorageLog::StorageLog( setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Storage {} requires data path", getName()); /// Enumerate data files. for (const auto & column : storage_metadata.getColumns().getAllPhysical()) @@ -592,8 +591,8 @@ StorageLog::StorageLog( void StorageLog::addDataFiles(const NameAndTypePair & column) { if (data_files_by_names.contains(column.name)) - throw Exception("Duplicate column with name " + column.name + " in constructor of StorageLog.", - ErrorCodes::DUPLICATE_COLUMN); + throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Duplicate column with name {} in constructor of StorageLog.", + column.name); ISerialization::StreamCallback stream_callback = [&] (const ISerialization::SubstreamPath & substream_path) { @@ -624,7 +623,7 @@ void StorageLog::loadMarks(std::chrono::seconds lock_timeout) /// a data race between two threads trying to load marks simultaneously. WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); loadMarks(lock); } @@ -639,7 +638,7 @@ void StorageLog::loadMarks(const WriteLock & lock /* already locked exclusively { size_t file_size = disk->getFileSize(marks_file_path); if (file_size % (num_data_files * sizeof(Mark)) != 0) - throw Exception("Size of marks file is inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT); + throw Exception(ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT, "Size of marks file is inconsistent"); num_marks = file_size / (num_data_files * sizeof(Mark)); @@ -677,7 +676,7 @@ void StorageLog::saveMarks(const WriteLock & /* already locked for writing */) for (const auto & data_file : data_files) { if (data_file.marks.size() != num_marks) - throw Exception("Wrong number of marks generated from block. Makes no sense.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Wrong number of marks generated from block. Makes no sense."); } size_t start = num_marks_saved; @@ -756,7 +755,7 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr { WriteLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); disk->clearDirectory(table_path); @@ -791,7 +790,7 @@ Pipe StorageLog::read( ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); if (!num_data_files || !file_checker.getFileSize(data_files[INDEX_WITH_REAL_ROW_COUNT].path)) return Pipe(std::make_shared(storage_snapshot->getSampleBlockForColumns(column_names))); @@ -855,7 +854,7 @@ SinkToStoragePtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetada { WriteLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return std::make_shared(*this, metadata_snapshot, std::move(lock)); } @@ -864,7 +863,7 @@ CheckResults StorageLog::checkData(const ASTPtr & /* query */, ContextPtr local_ { ReadLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return file_checker.check(); } @@ -874,7 +873,7 @@ IStorage::ColumnSizeByName StorageLog::getColumnSizes() const { ReadLock lock{rwlock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); ColumnSizeByName column_sizes; @@ -932,7 +931,7 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); if (!num_data_files || !file_checker.getFileSize(data_files[INDEX_WITH_REAL_ROW_COUNT].path)) return; @@ -1009,7 +1008,7 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p { WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Load the marks if not loaded yet. We have to do that now because we're going to update these marks. loadMarks(lock); @@ -1045,7 +1044,7 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p size_t file_size = backup->getFileSize(file_path_in_backup); if (file_size % (num_data_files * sizeof(Mark)) != 0) - throw Exception("Size of marks file is inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT); + throw Exception(ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT, "Size of marks file is inconsistent"); num_extra_marks = file_size / (num_data_files * sizeof(Mark)); @@ -1102,9 +1101,8 @@ void registerStorageLog(StorageFactory & factory) auto create_fn = [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); String disk_name = getDiskName(*args.storage_def); DiskPtr disk = args.getContext()->getDisk(disk_name); diff --git a/src/Storages/StorageMaterializedMySQL.h b/src/Storages/StorageMaterializedMySQL.h index cbb59e508e8..9896265b576 100644 --- a/src/Storages/StorageMaterializedMySQL.h +++ b/src/Storages/StorageMaterializedMySQL.h @@ -44,7 +44,7 @@ public: private: [[noreturn]] static void throwNotAllowed() { - throw Exception("This method is not allowed for MaterializedMySQL", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "This method is not allowed for MaterializedMySQL"); } StoragePtr nested_storage; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index fefa83fd6fe..d424344e7bf 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -70,17 +70,17 @@ StorageMaterializedView::StorageMaterializedView( storage_metadata.setColumns(columns_); if (!query.select) - throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); /// If the destination table is not set, use inner table has_inner_table = query.to_table_id.empty(); if (has_inner_table && !query.storage) - throw Exception( - "You must specify where to save results of a MaterializedView query: either ENGINE or an existing table in a TO clause", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "You must specify where to save results of a MaterializedView query: " + "either ENGINE or an existing table in a TO clause"); if (query.select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW, "UNION is not supported for MATERIALIZED VIEW"); auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), local_context); storage_metadata.setSelectQuery(select); @@ -230,9 +230,8 @@ void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr void StorageMaterializedView::checkStatementCanBeForwarded() const { if (!has_inner_table) - throw Exception( - "MATERIALIZED VIEW targets existing table " + target_table_id.getNameForLogs() + ". " - + "Execute the statement directly on it.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "MATERIALIZED VIEW targets existing table {}. " + "Execute the statement directly on it.", target_table_id.getNameForLogs()); } bool StorageMaterializedView::optimize( diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index dd3a1e75151..f4013b6d2c2 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -449,7 +449,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu /// If sampling requested, then check that table supports it. if (query_info.query->as()->sampleSize() && !storage->supportsSampling()) - throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); + throw Exception(ErrorCodes::SAMPLING_NOT_SUPPORTED, "Illegal SAMPLE: table doesn't support sampling"); Aliases aliases; auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); @@ -739,7 +739,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( continue; if (query && query->as()->prewhere() && !storage->supportsPrewhere()) - throw Exception("Storage " + storage->getName() + " doesn't support PREWHERE.", ErrorCodes::ILLEGAL_PREWHERE); + throw Exception(ErrorCodes::ILLEGAL_PREWHERE, "Storage {} doesn't support PREWHERE.", storage->getName()); if (storage.get() != this) { @@ -931,11 +931,11 @@ std::tuple StorageMerge::evaluateDatabaseName(cons if (const auto * func = node->as(); func && func->name == "REGEXP") { if (func->arguments->children.size() != 1) - throw Exception("REGEXP in Merge ENGINE takes only one argument", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "REGEXP in Merge ENGINE takes only one argument"); auto * literal = func->arguments->children[0]->as(); if (!literal || literal->value.getType() != Field::Types::Which::String || literal->value.safeGet().empty()) - throw Exception("Argument for REGEXP in Merge ENGINE should be a non empty String Literal", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Argument for REGEXP in Merge ENGINE should be a non empty String Literal"); return {true, func->arguments->children[0]}; } @@ -956,9 +956,9 @@ void registerStorageMerge(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() != 2) - throw Exception("Storage Merge requires exactly 2 parameters" - " - name of source database and regexp for table names.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage Merge requires exactly 2 parameters - name " + "of source database and regexp for table names."); auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(engine_args[0], args.getLocalContext()); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 177b7cc1c97..b5631c6445f 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -107,7 +107,11 @@ StorageMergeTree::StorageMergeTree( loadDataParts(has_force_restore_data_flag); if (!attach && !getDataPartsForInternalUsage().empty()) - throw Exception("Data directory for table already containing data parts - probably it was unclean DROP table or manual intervention. You must either clear directory by hand or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Data directory for table already containing data parts - probably " + "it was unclean DROP table or manual intervention. " + "You must either clear directory by hand or use ATTACH TABLE instead " + "of CREATE TABLE if you need to use that parts."); increment.set(getMaxBlockNumber()); @@ -321,7 +325,7 @@ void StorageMergeTree::alter( /// We cannot place this check into settings sanityCheck because it depends on format_version. /// sanityCheck must work event without storage. if (new_storage_settings->non_replicated_deduplication_window != 0 && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Deduplication for non-replicated MergeTree in old syntax is not supported"); deduplication_log->setDeduplicationWindowSize(new_storage_settings->non_replicated_deduplication_window); } @@ -375,9 +379,9 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( if (!reserved_space) { if (is_mutation) - throw Exception("Not enough space for mutating part '" + future_part->parts[0]->name + "'", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for mutating part '{}'", future_part->parts[0]->name); else - throw Exception("Not enough space for merging parts", ErrorCodes::NOT_ENOUGH_SPACE); + throw Exception(ErrorCodes::NOT_ENOUGH_SPACE, "Not enough space for merging parts"); } future_part->updatePath(storage, reserved_space.get()); @@ -385,7 +389,7 @@ CurrentlyMergingPartsTagger::CurrentlyMergingPartsTagger( for (const auto & part : future_part->parts) { if (storage.currently_merging_mutating_parts.contains(part)) - throw Exception("Tagging already tagged part " + part->name + ". This is a bug.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Tagging already tagged part {}. This is a bug.", part->name); } storage.currently_merging_mutating_parts.insert(future_part->parts.begin(), future_part->parts.end()); } @@ -714,7 +718,7 @@ void StorageMergeTree::loadDeduplicationLog() { auto settings = getSettings(); if (settings->non_replicated_deduplication_window != 0 && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - throw Exception("Deduplication for non-replicated MergeTree in old syntax is not supported", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Deduplication for non-replicated MergeTree in old syntax is not supported"); auto disk = getDisks()[0]; std::string path = fs::path(relative_data_path) / "deduplication_logs"; @@ -1422,7 +1426,7 @@ ActionLock StorageMergeTree::stopMergesAndWait() if (std::cv_status::timeout == currently_processing_in_background_condition.wait_for( lock, std::chrono::seconds(DBMS_DEFAULT_LOCK_ACQUIRE_TIMEOUT_SEC))) { - throw Exception("Timeout while waiting for already running merges", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Timeout while waiting for already running merges"); } } @@ -1546,7 +1550,9 @@ void StorageMergeTree::renameAndCommitEmptyParts(MutableDataPartsVector & new_pa DataPartsVector covered_parts_by_one_part = renameTempPartAndReplace(part, transaction); if (covered_parts_by_one_part.size() > 1) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} expected to cover not more then 1 part. {} covered parts have been found. This is a bug.", + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Part {} expected to cover not more then 1 part. " + "{} covered parts have been found. This is a bug.", part->name, covered_parts_by_one_part.size()); std::move(covered_parts_by_one_part.begin(), covered_parts_by_one_part.end(), std::back_inserter(covered_parts)); @@ -1787,9 +1793,9 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con for (const DataPartPtr & src_part : src_parts) { if (!canReplacePartition(src_part)) - throw Exception( - "Cannot replace partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Cannot replace partition '{}' because part '{}' has inconsistent granularity with table", + partition_id, src_part->name); /// This will generate unique name in scope of current server process. Int64 temp_index = insert_increment.get(); @@ -1856,13 +1862,16 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) - throw Exception("Table " + getStorageID().getNameForLogs() + " supports movePartitionToTable only for MergeTree family of table engines." - " Got " + dest_table->getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Table {} supports movePartitionToTable only for MergeTree family of table engines. Got {}", + getStorageID().getNameForLogs(), dest_table->getName()); if (dest_table_storage->getStoragePolicy() != this->getStoragePolicy()) - throw Exception("Destination table " + dest_table_storage->getStorageID().getNameForLogs() + - " should have the same storage policy of source table " + getStorageID().getNameForLogs() + ". " + - getStorageID().getNameForLogs() + ": " + this->getStoragePolicy()->getName() + ", " + - dest_table_storage->getStorageID().getNameForLogs() + ": " + dest_table_storage->getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_POLICY); + throw Exception(ErrorCodes::UNKNOWN_POLICY, + "Destination table {} should have the same storage policy of source table {}. {}: {}, {}: {}", + dest_table_storage->getStorageID().getNameForLogs(), + getStorageID().getNameForLogs(), getStorageID().getNameForLogs(), + this->getStoragePolicy()->getName(), dest_table_storage->getStorageID().getNameForLogs(), + dest_table_storage->getStoragePolicy()->getName()); auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr(); @@ -1880,9 +1889,9 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const for (const DataPartPtr & src_part : src_parts) { if (!dest_table_storage->canReplacePartition(src_part)) - throw Exception( - "Cannot move partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot move partition '{}' because part '{}' has inconsistent granularity with table", + partition_id, src_part->name); /// This will generate unique name in scope of current server process. Int64 temp_index = insert_increment.get(); diff --git a/src/Storages/StorageMongoDB.cpp b/src/Storages/StorageMongoDB.cpp index 17d3a7cf970..25a303620d6 100644 --- a/src/Storages/StorageMongoDB.cpp +++ b/src/Storages/StorageMongoDB.cpp @@ -78,7 +78,7 @@ void StorageMongoDB::connectIfNotConnected() { Poco::MongoDB::Database poco_db(auth_db); if (!poco_db.authenticate(*connection, username, password, Poco::MongoDB::Database::AUTH_SCRAM_SHA1)) - throw Exception("Cannot authenticate in MongoDB, incorrect user or password", ErrorCodes::MONGODB_CANNOT_AUTHENTICATE); + throw Exception(ErrorCodes::MONGODB_CANNOT_AUTHENTICATE, "Cannot authenticate in MongoDB, incorrect user or password"); } authenticated = true; @@ -200,9 +200,9 @@ StorageMongoDB::Configuration StorageMongoDB::getConfiguration(ASTs engine_args, else { if (engine_args.size() < 5 || engine_args.size() > 6) - throw Exception( - "Storage MongoDB requires from 5 to 6 parameters: MongoDB('host:port', database, collection, 'user', 'password' [, 'options']).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage MongoDB requires from 5 to 6 parameters: " + "MongoDB('host:port', database, collection, 'user', 'password' [, 'options'])."); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context); diff --git a/src/Storages/StorageMongoDBSocketFactory.cpp b/src/Storages/StorageMongoDBSocketFactory.cpp index 7308c4b3ce7..6dfcbd7e00e 100644 --- a/src/Storages/StorageMongoDBSocketFactory.cpp +++ b/src/Storages/StorageMongoDBSocketFactory.cpp @@ -46,7 +46,7 @@ Poco::Net::StreamSocket StorageMongoDBSocketFactory::createSecureSocket(const st return socket; #else - throw Exception("SSL is not enabled at build time.", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME); + throw Exception(ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME, "SSL is not enabled at build time."); #endif } diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 20eb59c7262..ee647043407 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -264,9 +264,9 @@ StorageMySQLConfiguration StorageMySQL::getConfiguration(ASTs engine_args, Conte else { if (engine_args.size() < 5 || engine_args.size() > 7) - throw Exception( - "Storage MySQL requires 5-7 parameters: MySQL('host:port' (or 'addresses_pattern'), database, table, 'user', 'password'[, replace_query, 'on_duplicate_clause']).", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Storage MySQL requires 5-7 parameters: " + "MySQL('host:port' (or 'addresses_pattern'), database, table, " + "'user', 'password'[, replace_query, 'on_duplicate_clause'])."); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, context_); @@ -305,7 +305,7 @@ void registerStorageMySQL(StorageFactory & factory) mysql_settings.loadFromQuery(*args.storage_def); if (!mysql_settings.connection_pool_size) - throw Exception("connection_pool_size cannot be zero.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "connection_pool_size cannot be zero."); mysqlxx::PoolWithFailover pool = createMySQLPoolWithFailover(configuration, mysql_settings); diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 638ebf5109b..aa462e1a40c 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -25,9 +25,8 @@ void registerStorageNull(StorageFactory & factory) factory.registerStorage("Null", [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); return std::make_shared(args.table_id, args.columns, args.constraints, args.comment); }, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c87527ee740..28a80607fa2 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -217,7 +217,7 @@ zkutil::ZooKeeperPtr StorageReplicatedMergeTree::getZooKeeper() const { auto res = tryGetZooKeeper(); if (!res) - throw Exception("Cannot get ZooKeeper", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "Cannot get ZooKeeper"); return res; } @@ -405,7 +405,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( if (!attach) { dropIfEmpty(); - throw Exception("Can't create replicated table without ZooKeeper", ErrorCodes::NO_ZOOKEEPER); + throw Exception(ErrorCodes::NO_ZOOKEEPER, "Can't create replicated table without ZooKeeper"); } has_metadata_in_zookeeper = std::nullopt; @@ -431,10 +431,10 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( has_metadata_in_zookeeper = true; if (!getDataPartsForInternalUsage().empty()) - throw Exception("Data directory for table already contains data parts" - " - probably it was unclean DROP table or manual intervention." - " You must either clear directory by hand or use ATTACH TABLE" - " instead of CREATE TABLE if you need to use that parts.", ErrorCodes::INCORRECT_DATA); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Data directory for table already contains data parts - probably it was unclean DROP table " + "or manual intervention. You must either clear directory by hand " + "or use ATTACH TABLE instead of CREATE TABLE if you need to use that parts."); try { @@ -458,7 +458,7 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( catch (Coordination::Exception & e) { if (!is_first_replica && e.code == Coordination::Error::ZNONODE) - throw Exception("Table " + zookeeper_path + " was suddenly removed.", ErrorCodes::ALL_REPLICAS_LOST); + throw Exception(ErrorCodes::ALL_REPLICAS_LOST, "Table {} was suddenly removed.", zookeeper_path); else throw; } @@ -582,8 +582,9 @@ void StorageReplicatedMergeTree::waitMutationToFinishOnReplicas( } if (partial_shutdown_called) - throw Exception("Mutation is not finished because table shutdown was called. It will be done after table restart.", - ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, + "Mutation is not finished because table shutdown was called. " + "It will be done after table restart."); /// Replica inactive, don't check mutation status if (!inactive_replicas.empty() && inactive_replicas.contains(replica)) @@ -799,9 +800,9 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr } /// Do not use LOGICAL_ERROR code, because it may happen if user has specified wrong zookeeper_path - throw Exception("Cannot create table, because it is created concurrently every time " - "or because of wrong zookeeper_path " - "or because of logical error", ErrorCodes::REPLICA_ALREADY_EXISTS); + throw Exception(ErrorCodes::REPLICA_ALREADY_EXISTS, + "Cannot create table, because it is created concurrently every time or because " + "of wrong zookeeper_path or because of logical error"); } void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metadata_snapshot) @@ -904,7 +905,7 @@ void StorageReplicatedMergeTree::drop() /// If probably there is metadata in ZooKeeper, we don't allow to drop the table. if (!zookeeper) - throw Exception("Can't drop readonly replicated table (need to drop data in ZooKeeper as well)", ErrorCodes::TABLE_IS_READ_ONLY); + throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Can't drop readonly replicated table (need to drop data in ZooKeeper as well)"); dropReplica(zookeeper, zookeeper_path, replica_name, log, getSettings()); } @@ -926,7 +927,7 @@ void StorageReplicatedMergeTree::dropReplica(zkutil::ZooKeeperPtr zookeeper, con Poco::Logger * logger, MergeTreeSettingsPtr table_settings) { if (zookeeper->expired()) - throw Exception("Table was not dropped because ZooKeeper session has expired.", ErrorCodes::TABLE_WAS_NOT_DROPPED); + throw Exception(ErrorCodes::TABLE_WAS_NOT_DROPPED, "Table was not dropped because ZooKeeper session has expired."); auto remote_replica_path = zookeeper_path + "/replicas/" + replica; @@ -1286,9 +1287,11 @@ void StorageReplicatedMergeTree::checkParts(bool skip_sanity_checks) LOG_DEBUG(log, sanity_report_debug_fmt, fmt::join(uncovered_unexpected_parts, ", "), fmt::join(parts_to_fetch, ", "), fmt::join(covered_unexpected_parts, ", "), fmt::join(expected_parts, ", ")); throw Exception(ErrorCodes::TOO_MANY_UNEXPECTED_DATA_PARTS, sanity_report_fmt, getStorageID().getNameForLogs(), - formatReadableQuantity(uncovered_unexpected_parts_rows), formatReadableQuantity(total_rows_on_filesystem), + formatReadableQuantity(uncovered_unexpected_parts_rows), + formatReadableQuantity(total_rows_on_filesystem), uncovered_unexpected_parts.size(), uncovered_unexpected_parts_rows, unexpected_parts_nonnew, unexpected_parts_nonnew_rows, - parts_to_fetch.size(), parts_to_fetch_blocks, covered_unexpected_parts.size(), unexpected_parts_rows - uncovered_unexpected_parts_rows); + parts_to_fetch.size(), parts_to_fetch_blocks, covered_unexpected_parts.size(), + unexpected_parts_rows - uncovered_unexpected_parts_rows); } if (unexpected_parts_nonnew_rows > 0 || uncovered_unexpected_parts_rows > 0) @@ -1665,7 +1668,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che if (entry.quorum) { if (entry.type != LogEntry::GET_PART) - throw Exception("Logical error: log entry with quorum but type is not GET_PART", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum but type is not GET_PART"); LOG_DEBUG(log, "No active replica has part {} which needs to be written with quorum. Will try to mark that quorum as failed.", entry.new_part_name); @@ -1728,8 +1731,7 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che auto part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); if (part_info.min_block != part_info.max_block) - throw Exception("Logical error: log entry with quorum for part covering more than one block number", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: log entry with quorum for part covering more than one block number"); ops.emplace_back(zkutil::makeCreateRequest( fs::path(zookeeper_path) / "quorum" / "failed_parts" / entry.new_part_name, @@ -2227,7 +2229,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) { auto part_desc = part_name_to_desc[final_part_name]; if (!part_desc) - throw Exception("There is no final part " + final_part_name + ". This is a bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no final part {}. This is a bug", final_part_name); final_parts.emplace_back(part_desc); @@ -2238,8 +2240,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (!prev.found_new_part_info.isDisjoint(curr.found_new_part_info)) { - throw Exception("Intersected final parts detected: " + prev.found_new_part_name - + " and " + curr.found_new_part_name + ". It should be investigated.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Intersected final parts detected: {} and {}. It should be investigated.", + prev.found_new_part_name, curr.found_new_part_name); } } } @@ -2254,7 +2256,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) if (part_desc->src_table_part) { if (part_desc->checksum_hex != part_desc->src_table_part->checksums.getTotalChecksumHex()) - throw Exception("Checksums of " + part_desc->src_table_part->name + " is suddenly changed", ErrorCodes::UNFINISHED); + throw Exception(ErrorCodes::UNFINISHED, "Checksums of {} is suddenly changed", part_desc->src_table_part->name); auto [res_part, temporary_part_lock] = cloneAndLoadDataPartOnSameDisk( part_desc->src_table_part, TMP_PREFIX + "clone_", part_desc->new_part_info, metadata_snapshot, NO_TRANSACTION_PTR, &part_desc->hardlinked_files, false, {}); @@ -2271,7 +2273,9 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) String interserver_scheme = getContext()->getInterserverScheme(); if (interserver_scheme != address.scheme) - throw Exception("Interserver schemas are different '" + interserver_scheme + "' != '" + address.scheme + "', can't fetch part from " + address.host, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Interserver schemas are different '{}' != '{}', can't fetch part from {}", + interserver_scheme, address.scheme, address.host); part_desc->res_part = fetcher.fetchSelectedPart( metadata_snapshot, getContext(), part_desc->found_new_part_name, source_replica_path, @@ -2283,7 +2287,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) ProfileEvents::increment(ProfileEvents::ReplicatedPartFetches); } else - throw Exception("There is no receipt to produce part " + part_desc->new_part_name + ". This is bug", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no receipt to produce part {}. This is bug", part_desc->new_part_name); }; /// Download or clone parts @@ -2370,7 +2374,8 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr } if (replica.empty()) - throw Exception(ErrorCodes::NO_REPLICA_HAS_PART, "Not found active replica on shard {} to clone part {}", entry.source_shard, entry.new_part_name); + throw Exception(ErrorCodes::NO_REPLICA_HAS_PART, "Not found active replica on shard {} to clone part {}", + entry.source_shard, entry.new_part_name); LOG_INFO(log, "Will clone part from shard {} and replica {}", entry.source_shard, replica); @@ -2387,9 +2392,8 @@ void StorageReplicatedMergeTree::executeClonePartFromShard(const LogEntry & entr auto get_part = [&, address, timeouts, credentials, interserver_scheme]() { if (interserver_scheme != address.scheme) - throw Exception("Interserver schemes are different: '" + interserver_scheme - + "' != '" + address.scheme + "', can't fetch part from " + address.host, - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Interserver schemes are different: '{}' != '{}', can't fetch part from {}", + interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( metadata_snapshot, getContext(), entry.new_part_name, source_replica_path, @@ -2452,16 +2456,15 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo } else if (rc == Coordination::Error::ZNODEEXISTS) { - throw Exception( - "Can not clone replica, because the " + source_replica + " updated to new ClickHouse version", - ErrorCodes::REPLICA_STATUS_CHANGED); + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, + "Can not clone replica, because the {} updated to new ClickHouse version", source_replica); } else if (responses[1]->error == Coordination::Error::ZBADVERSION) { /// If is_lost node version changed than source replica also lost, /// so we cannot clone from it. - throw Exception( - "Can not clone replica, because the " + source_replica + " became lost", ErrorCodes::REPLICA_STATUS_CHANGED); + throw Exception(ErrorCodes::REPLICA_STATUS_CHANGED, "Can not clone replica, because the {} became lost", + source_replica); } else if (responses.back()->error == Coordination::Error::ZBADVERSION) { @@ -3759,6 +3762,9 @@ String StorageReplicatedMergeTree::findReplicaHavingCoveringPart( */ void StorageReplicatedMergeTree::updateQuorum(const String & part_name, bool is_parallel) { + if (is_parallel && format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Parallel quorum inserts are not compatible with the deprecated syntax of *MergeTree engines"); + auto zookeeper = getZooKeeper(); /// Information on which replicas a part has been added, if the quorum has not yet been reached. @@ -3918,7 +3924,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) bool StorageReplicatedMergeTree::partIsInsertingWithParallelQuorum(const MergeTreePartInfo & part_info) const { auto zookeeper = getZooKeeper(); - return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameAndCheckFormat(format_version)); + return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameV1()); } @@ -4084,9 +4090,8 @@ bool StorageReplicatedMergeTree::fetchPart( get_part = [&, address, timeouts, credentials, interserver_scheme]() { if (interserver_scheme != address.scheme) - throw Exception("Interserver schemes are different: '" + interserver_scheme - + "' != '" + address.scheme + "', can't fetch part from " + address.host, - ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH); + throw Exception(ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH, "Interserver schemes are different: " + "'{}' != '{}', can't fetch part from {}", interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( metadata_snapshot, @@ -4260,9 +4265,8 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( get_part = [&, address, timeouts, interserver_scheme, credentials]() { if (interserver_scheme != address.scheme) - throw Exception("Interserver schemes are different: '" + interserver_scheme - + "' != '" + address.scheme + "', can't fetch part from " + address.host, - ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH); + throw Exception(ErrorCodes::INTERSERVER_SCHEME_DOESNT_MATCH, "Interserver schemes are different: " + "'{}' != '{}', can't fetch part from {}", interserver_scheme, address.scheme, address.host); return fetcher.fetchSelectedPart( metadata_snapshot, getContext(), part_name, source_replica_path, @@ -4277,7 +4281,7 @@ MutableDataPartStoragePtr StorageReplicatedMergeTree::fetchExistsPart( part = get_part(); if (part->getDataPartStorage().getDiskName() != replaced_disk->getName()) - throw Exception("Part " + part->name + " fetched on wrong disk " + part->getDataPartStorage().getDiskName(), ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} fetched on wrong disk {}", part->name, part->getDataPartStorage().getDiskName()); auto replaced_path = fs::path(replaced_part_path); part->getDataPartStorage().rename(replaced_path.parent_path(), replaced_path.filename(), nullptr, true, false); @@ -4735,7 +4739,7 @@ std::optional StorageReplicatedMergeTree::distributedWrite(const const Settings & settings = local_context->getSettingsRef(); if (settings.max_distributed_depth && local_context->getClientInfo().distributed_depth >= settings.max_distributed_depth) - throw Exception("Maximum distributed depth exceeded", ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH); + throw Exception(ErrorCodes::TOO_LARGE_DISTRIBUTED_DEPTH, "Maximum distributed depth exceeded"); auto & select = query.select->as(); @@ -5216,14 +5220,17 @@ void StorageReplicatedMergeTree::alter( else if (rc == Coordination::Error::ZBADVERSION) { if (results[0]->error != Coordination::Error::ZOK) - throw Exception("Metadata on replica is not up to date with common metadata in Zookeeper. It means that this replica still not applied some of previous alters." - " Probably too many alters executing concurrently (highly not recommended). You can retry this error", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Metadata on replica is not up to date with common metadata in Zookeeper. " + "It means that this replica still not applied some of previous alters." + " Probably too many alters executing concurrently (highly not recommended). " + "You can retry this error"); /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception("Cannot execute alter, because mutations version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because mutations version was suddenly changed due " + "to concurrent alter"); continue; } @@ -5380,7 +5387,7 @@ void StorageReplicatedMergeTree::dropPartNoWaitNoThrow(const String & part_name) { assertNotReadonly(); if (!is_leader) - throw Exception("DROP PART cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "DROP PART cannot be done on this replica because it is not a leader"); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); LogEntry entry; @@ -5392,7 +5399,7 @@ void StorageReplicatedMergeTree::dropPart(const String & part_name, bool detach, { assertNotReadonly(); if (!is_leader) - throw Exception("DROP PART cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "DROP PART cannot be done on this replica because it is not a leader"); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); LogEntry entry; @@ -5421,7 +5428,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & partition, bool de { assertNotReadonly(); if (!is_leader) - throw Exception("DROP PARTITION cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "DROP PARTITION cannot be done on this replica because it is not a leader"); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); @@ -5450,7 +5457,7 @@ void StorageReplicatedMergeTree::truncate( assertNotReadonly(); if (!is_leader) - throw Exception("TRUNCATE cannot be done on this replica because it is not a leader", ErrorCodes::NOT_A_LEADER); + throw Exception(ErrorCodes::NOT_A_LEADER, "TRUNCATE cannot be done on this replica because it is not a leader"); waitForOutdatedPartsToBeLoaded(); zkutil::ZooKeeperPtr zookeeper = getZooKeeperAndAssertNotReadonly(); @@ -5524,18 +5531,19 @@ void StorageReplicatedMergeTree::checkTableCanBeRenamed(const StorageID & new_na return; } - throw Exception( - "Cannot rename Replicated table, because zookeeper_path contains implicit 'database' or 'table' macro. " - "We cannot rename path in ZooKeeper, so path may become inconsistent with table name. If you really want to rename table, " - "you should edit metadata file first and restart server or reattach the table.", - ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Cannot rename Replicated table, because zookeeper_path contains implicit 'database' " + "or 'table' macro. We cannot rename path " + "in ZooKeeper, so path may become inconsistent with table name. " + "If you really want to rename table, you should edit metadata file first and restart server or reattach the table."); } assert(renaming_restrictions == RenamingRestrictions::ALLOW_PRESERVING_UUID); if (!new_name.hasUUID() && getStorageID().hasUUID()) - throw Exception("Cannot move Replicated table to Ordinary database, because zookeeper_path contains implicit 'uuid' macro. " - "If you really want to rename table, " - "you should edit metadata file first and restart server or reattach the table.", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Cannot move Replicated table to Ordinary database, because zookeeper_path contains implicit " + "'uuid' macro. If you really want to rename table, you should edit metadata file first " + "and restart server or reattach the table."); } void StorageReplicatedMergeTree::rename(const String & new_path_to_table_data, const StorageID & new_table_id) @@ -5823,7 +5831,7 @@ bool StorageReplicatedMergeTree::tryWaitForReplicaToProcessLogEntry( } else { - throw Exception("Logical error: unexpected name of log node: " + entry.znode_name, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: unexpected name of log node: {}", entry.znode_name); } /** Second - find the corresponding entry in the queue of the specified replica. @@ -6080,7 +6088,7 @@ void StorageReplicatedMergeTree::fetchPartition( String auxiliary_zookeeper_name = zkutil::extractZooKeeperName(expand_from); String from = zkutil::extractZooKeeperPath(expand_from, /* check_starts_with_slash */ true); if (from.empty()) - throw Exception("ZooKeeper path should not be empty", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "ZooKeeper path should not be empty"); zkutil::ZooKeeperPtr zookeeper; if (auxiliary_zookeeper_name != default_zookeeper_name) @@ -6102,7 +6110,7 @@ void StorageReplicatedMergeTree::fetchPartition( * Unreliable (there is a race condition) - such a part may appear a little later. */ if (checkIfDetachedPartExists(part_name)) - throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Detached part " + part_name + " already exists."); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Detached part {} already exists.", part_name); LOG_INFO(log, "Will fetch part {} from shard {} (zookeeper '{}')", part_name, from_, auxiliary_zookeeper_name); try @@ -6129,7 +6137,7 @@ void StorageReplicatedMergeTree::fetchPartition( * Unreliable (there is a race condition) - such a partition may appear a little later. */ if (checkIfDetachedPartitionExists(partition_id)) - throw Exception("Detached partition " + partition_id + " already exists.", ErrorCodes::PARTITION_ALREADY_EXISTS); + throw Exception(ErrorCodes::PARTITION_ALREADY_EXISTS, "Detached partition {} already exists.", partition_id); zkutil::Strings replicas; zkutil::Strings active_replicas; @@ -6147,7 +6155,7 @@ void StorageReplicatedMergeTree::fetchPartition( active_replicas.push_back(replica); if (active_replicas.empty()) - throw Exception("No active replicas for shard " + from, ErrorCodes::NO_ACTIVE_REPLICAS); + throw Exception(ErrorCodes::NO_ACTIVE_REPLICAS, "No active replicas for shard {}", from); /** You must select the best (most relevant) replica. * This is a replica with the maximum `log_pointer`, then with the minimum `queue` size. @@ -6180,7 +6188,7 @@ void StorageReplicatedMergeTree::fetchPartition( } if (best_replica.empty()) - throw Exception("Logical error: cannot choose best replica.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: cannot choose best replica."); LOG_INFO(log, "Found {} replicas, {} of them are active. Selected {} to fetch from.", replicas.size(), active_replicas.size(), best_replica); @@ -6201,7 +6209,7 @@ void StorageReplicatedMergeTree::fetchPartition( LOG_INFO(log, "Some of parts ({}) are missing. Will try to fetch covering parts.", missing_parts.size()); if (try_no >= query_context->getSettings().max_fetch_partition_retries_count) - throw Exception("Too many retries to fetch parts from " + best_replica_path, ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS); + throw Exception(ErrorCodes::TOO_MANY_RETRIES_TO_FETCH_PARTS, "Too many retries to fetch parts from {}", best_replica_path); Strings parts = zookeeper->getChildren(fs::path(best_replica_path) / "parts"); ActiveDataPartSet active_parts_set(format_version, parts); @@ -6222,7 +6230,7 @@ void StorageReplicatedMergeTree::fetchPartition( parts_to_fetch = std::move(parts_to_fetch_partition); if (parts_to_fetch.empty()) - throw Exception("Partition " + partition_id + " on " + best_replica_path + " doesn't exist", ErrorCodes::PARTITION_DOESNT_EXIST); + throw Exception(ErrorCodes::PARTITION_DOESNT_EXIST, "Partition {} on {} doesn't exist", partition_id, best_replica_path); } else { @@ -6369,8 +6377,9 @@ void StorageReplicatedMergeTree::mutate(const MutationCommands & commands, Conte { /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception("Cannot execute alter, because mutations version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because mutations version was suddenly changed due " + "to concurrent alter"); LOG_TRACE(log, "Version conflict when trying to create a mutation node, retrying..."); continue; } @@ -6906,9 +6915,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( /// Save deduplication block ids with special prefix replace_partition if (!canReplacePartition(src_part)) - throw Exception( - "Cannot replace partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot replace partition '{}' because part '{}" + "' has inconsistent granularity with table", partition_id, src_part->name); String hash_hex = src_part->checksums.getTotalChecksumHex(); @@ -7011,9 +7020,9 @@ void StorageReplicatedMergeTree::replacePartitionFrom( { /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception( - "Cannot execute alter, because alter partition version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because alter partition version was suddenly changed due " + "to concurrent alter"); continue; } else @@ -7070,13 +7079,16 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) - throw Exception("Table " + getStorageID().getNameForLogs() + " supports movePartitionToTable only for ReplicatedMergeTree family of table engines." - " Got " + dest_table->getName(), ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Table {} supports movePartitionToTable only for ReplicatedMergeTree family of table engines. " + "Got {}", getStorageID().getNameForLogs(), dest_table->getName()); if (dest_table_storage->getStoragePolicy() != this->getStoragePolicy()) - throw Exception("Destination table " + dest_table_storage->getStorageID().getNameForLogs() + - " should have the same storage policy of source table " + getStorageID().getNameForLogs() + ". " + - getStorageID().getNameForLogs() + ": " + this->getStoragePolicy()->getName() + ", " + - getStorageID().getNameForLogs() + ": " + dest_table_storage->getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_POLICY); + throw Exception(ErrorCodes::UNKNOWN_POLICY, + "Destination table {} should have the same storage policy of source table {}. {}: {}, {}: {}", + dest_table_storage->getStorageID().getNameForLogs(), + getStorageID().getNameForLogs(), getStorageID().getNameForLogs(), + this->getStoragePolicy()->getName(), getStorageID().getNameForLogs(), + dest_table_storage->getStoragePolicy()->getName()); auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr(); @@ -7142,9 +7154,9 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta for (const auto & src_part : src_all_parts) { if (!dest_table_storage->canReplacePartition(src_part)) - throw Exception( - "Cannot move partition '" + partition_id + "' because part '" + src_part->name + "' has inconsistent granularity with table", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, + "Cannot move partition '{}' because part '{}" + "' has inconsistent granularity with table", partition_id, src_part->name); String hash_hex = src_part->checksums.getTotalChecksumHex(); String block_id_path; @@ -7300,7 +7312,8 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta return; } - throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, "Cannot assign ALTER PARTITION, because another ALTER PARTITION query was concurrently executed"); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot assign ALTER PARTITION, because another ALTER PARTITION query was concurrently executed"); } void StorageReplicatedMergeTree::movePartitionToShard( @@ -7309,10 +7322,10 @@ void StorageReplicatedMergeTree::movePartitionToShard( /// This is a lightweight operation that only optimistically checks if it could succeed and queues tasks. if (!move_part) - throw Exception("MOVE PARTITION TO SHARD is not supported, use MOVE PART instead", ErrorCodes::NOT_IMPLEMENTED); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "MOVE PARTITION TO SHARD is not supported, use MOVE PART instead"); if (zkutil::normalizeZooKeeperPath(zookeeper_path, /* check_starts_with_slash */ true) == zkutil::normalizeZooKeeperPath(to, /* check_starts_with_slash */ true)) - throw Exception("Source and destination are the same", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Source and destination are the same"); auto zookeeper = getZooKeeperAndAssertNotReadonly(); @@ -7334,7 +7347,9 @@ void StorageReplicatedMergeTree::movePartitionToShard( { if (partIsLastQuorumPart(part->info)) { - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Part {} is last inserted part with quorum in partition. Would not be able to drop", part_name); + throw Exception(ErrorCodes::NOT_IMPLEMENTED, + "Part {} is last inserted part with quorum in partition. Would not be able to drop", + part_name); } /// canMergeSinglePart is overlapping with dropPart, let's try to use the same code. @@ -7362,7 +7377,7 @@ void StorageReplicatedMergeTree::movePartitionToShard( } if (src_pins.part_uuids.contains(part->uuid) || dst_pins.part_uuids.contains(part->uuid)) - throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part {} has it's uuid ({}) already pinned.", part_name, toString(part->uuid)); + throw Exception(ErrorCodes::PART_IS_TEMPORARILY_LOCKED, "Part {} has it's uuid ({}) already pinned.", part_name, part->uuid); src_pins.part_uuids.insert(part->uuid); dst_pins.part_uuids.insert(part->uuid); @@ -7530,7 +7545,7 @@ bool StorageReplicatedMergeTree::waitForShrinkingQueueSize(size_t queue_size, UI return false; if (partial_shutdown_called) - throw Exception("Shutdown is called for table", ErrorCodes::ABORTED); + throw Exception(ErrorCodes::ABORTED, "Shutdown is called for table"); } return true; @@ -7729,9 +7744,9 @@ void StorageReplicatedMergeTree::dropAllPartsInPartitions( { /// Cannot retry automatically, because some zookeeper ops were lost on the first attempt. Will retry on DDLWorker-level. if (query_context->getZooKeeperMetadataTransaction()) - throw Exception( - "Cannot execute alter, because alter partition version was suddenly changed due to concurrent alter", - ErrorCodes::CANNOT_ASSIGN_ALTER); + throw Exception(ErrorCodes::CANNOT_ASSIGN_ALTER, + "Cannot execute alter, because alter partition version was suddenly changed due " + "to concurrent alter"); continue; } else @@ -8603,7 +8618,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP /// source replica will replace lost part with empty part and we /// will fetch this empty part instead of our source parts. This /// will make replicas consistent, but some data will be lost. - throw Exception(ErrorCodes::INCORRECT_DATA, "Tried to create empty part {}, but it replaces existing parts {}.", lost_part_name, fmt::join(part_names, ", ")); + throw Exception(ErrorCodes::INCORRECT_DATA, + "Tried to create empty part {}, but it replaces existing parts {}.", + lost_part_name, fmt::join(part_names, ", ")); } lockSharedData(*new_data_part, false, {}); @@ -8650,7 +8667,9 @@ bool StorageReplicatedMergeTree::createEmptyPartInsteadOfLost(zkutil::ZooKeeperP } else { - throw Exception(ErrorCodes::DUPLICATE_DATA_PART, "Part {} already exists on replica {} on path {}", lost_part_name, replica, current_part_path); + throw Exception(ErrorCodes::DUPLICATE_DATA_PART, + "Part {} already exists on replica {} on path {}", + lost_part_name, replica, current_part_path); } } @@ -8724,7 +8743,8 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( } else if (error == Coordination::Error::ZNONODE && mode != zkutil::CreateMode::Persistent) { - throw Exception(ErrorCodes::NOT_FOUND_NODE, "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); + throw Exception(ErrorCodes::NOT_FOUND_NODE, + "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); } } else @@ -8750,7 +8770,8 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( else if (error == Coordination::Error::ZNONODE && mode != zkutil::CreateMode::Persistent) { /// Ephemeral locks used during fetches so if parent node was removed we cannot do anything - throw Exception(ErrorCodes::NOT_FOUND_NODE, "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); + throw Exception(ErrorCodes::NOT_FOUND_NODE, + "Cannot create ephemeral zero copy lock {} because part was unlocked from zookeeper", zookeeper_node); } } } @@ -8766,7 +8787,9 @@ void StorageReplicatedMergeTree::createZeroCopyLockNode( if (!created) { String mode_str = mode == zkutil::CreateMode::Persistent ? "persistent" : "ephemeral"; - throw Exception(ErrorCodes::NOT_FOUND_NODE, "Cannot create {} zero copy lock {} because part was unlocked from zookeeper", mode_str, zookeeper_node); + throw Exception(ErrorCodes::NOT_FOUND_NODE, + "Cannot create {} zero copy lock {} because part was unlocked from zookeeper", + mode_str, zookeeper_node); } } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 736aa0d8a5c..bfc7e8f7f64 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -156,7 +156,7 @@ public: , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) - throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); + throw Exception(ErrorCodes::UNEXPECTED_EXPRESSION, "Expression can not have wildcards inside bucket name"); const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); @@ -579,15 +579,6 @@ StorageS3Source::StorageS3Source( reader_future = createReaderAsync(); } - -void StorageS3Source::onCancel() -{ - std::lock_guard lock(reader_mutex); - if (reader) - reader->cancel(); -} - - StorageS3Source::ReaderHolder StorageS3Source::createReader() { auto [current_key, info] = (*file_iterator)(); @@ -708,8 +699,12 @@ Chunk StorageS3Source::generate() { while (true) { - if (!reader || isCancelled()) + if (isCancelled() || !reader) + { + if (reader) + reader->cancel(); break; + } Chunk chunk; if (reader->pull(chunk)) @@ -741,21 +736,19 @@ Chunk StorageS3Source::generate() return chunk; } - { - std::lock_guard lock(reader_mutex); - assert(reader_future.valid()); - reader = reader_future.get(); + assert(reader_future.valid()); + reader = reader_future.get(); - if (!reader) - break; + if (!reader) + break; - /// Even if task is finished the thread may be not freed in pool. - /// So wait until it will be freed before scheduling a new task. - create_reader_pool.wait(); - reader_future = createReaderAsync(); - } + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool.wait(); + reader_future = createReaderAsync(); } + return {}; } @@ -1146,7 +1139,8 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr else { if (is_key_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert; @@ -1167,11 +1161,12 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr } else throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Object in bucket {} with key {} already exists. If you want to overwrite it, enable setting s3_truncate_on_insert, if you " - "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", - s3_configuration.uri.bucket, - keys.back()); + ErrorCodes::BAD_ARGUMENTS, + "Object in bucket {} with key {} already exists. " + "If you want to overwrite it, enable setting s3_truncate_on_insert, if you " + "want to create a new file on each insert, enable setting s3_create_new_file_on_insert", + s3_configuration.uri.bucket, + keys.back()); } return std::make_shared( @@ -1191,7 +1186,8 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, updateS3Configuration(local_context, s3_configuration); if (is_key_with_globs) - throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); + throw Exception(ErrorCodes::DATABASE_ACCESS_DENIED, + "S3 key '{}' contains globs, so the table is in readonly mode", s3_configuration.uri.key); Aws::S3::Model::Delete delkeys; @@ -1211,7 +1207,7 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, if (!response.IsSuccess()) { const auto & err = response.GetError(); - throw Exception(std::to_string(static_cast(err.GetErrorType())) + ": " + err.GetMessage(), ErrorCodes::S3_ERROR); + throw Exception(ErrorCodes::S3_ERROR, "{}: {}", std::to_string(static_cast(err.GetErrorType())), err.GetMessage()); } } @@ -1301,9 +1297,9 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt /// S3('url', 'aws_access_key_id', 'aws_secret_access_key', 'format', 'compression') if (engine_args.empty() || engine_args.size() > 5) - throw Exception( - "Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage S3 requires 1 to 5 arguments: " + "url, [access_key_id, secret_access_key], name of used format and [compression_method]."); auto * header_it = StorageURL::collectHeaders(engine_args, configuration.headers, local_context); if (header_it != engine_args.end()) @@ -1391,9 +1387,8 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( if (first) throw Exception( ErrorCodes::CANNOT_EXTRACT_TABLE_STRUCTURE, - "Cannot extract table structure from {} format file, because there are no files with provided path in S3. You must specify " - "table structure manually", - format); + "Cannot extract table structure from {} format file, because there are no files with provided path " + "in S3. You must specify table structure manually", format); return nullptr; } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 02fcb7d624c..157507d81b3 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -145,8 +145,6 @@ public: Chunk generate() override; - void onCancel() override; - private: String name; String bucket; @@ -209,8 +207,6 @@ private: ReaderHolder reader; - /// onCancel and generate can be called concurrently - std::mutex reader_mutex; std::vector requested_virtual_columns; std::shared_ptr file_iterator; size_t download_thread_num = 1; diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index be83d60131a..fc505f67966 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -141,9 +141,10 @@ void S3Settings::RequestSettings::PartUploadSettings::validate() size_t maybe_overflow; if (common::mulOverflow(max_upload_part_size, upload_part_size_multiply_factor, maybe_overflow)) throw Exception( - ErrorCodes::INVALID_SETTING_VALUE, - "Setting upload_part_size_multiply_factor is too big ({}). Multiplication to max_upload_part_size ({}) will cause integer overflow", - ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor is too big ({}). " + "Multiplication to max_upload_part_size ({}) will cause integer overflow", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); std::unordered_set storage_class_names {"STANDARD", "INTELLIGENT_TIERING"}; if (!storage_class_name.empty() && !storage_class_names.contains(storage_class_name)) diff --git a/src/Storages/StorageSQLite.cpp b/src/Storages/StorageSQLite.cpp index 92f954ebb9d..706bc31122c 100644 --- a/src/Storages/StorageSQLite.cpp +++ b/src/Storages/StorageSQLite.cpp @@ -161,8 +161,7 @@ void registerStorageSQLite(StorageFactory & factory) ASTs & engine_args = args.engine_args; if (engine_args.size() != 2) - throw Exception("SQLite database requires 2 arguments: database path, table name", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "SQLite database requires 2 arguments: database path, table name"); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 48f8adfece2..b715a8e059b 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -129,7 +129,7 @@ StorageSetOrJoinBase::StorageSetOrJoinBase( if (relative_path_.empty()) - throw Exception("Join and Set storages require data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Join and Set storages require data path"); path = relative_path_; } @@ -242,9 +242,8 @@ void registerStorageSet(StorageFactory & factory) factory.registerStorage("Set", [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); bool has_settings = args.storage_def->settings; SetSettings set_settings; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 62823f6dabd..be5045b884f 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -175,7 +175,7 @@ public: *data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), storage.max_compress_block_size)) { if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Ensure that indices are loaded because we're going to update them. storage.loadIndices(lock); @@ -283,7 +283,7 @@ StorageStripeLog::StorageStripeLog( setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) - throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); + throw Exception(ErrorCodes::INCORRECT_FILE_NAME, "Storage {} requires data path", getName()); /// Ensure the file checker is initialized. if (file_checker.empty()) @@ -358,7 +358,7 @@ Pipe StorageStripeLog::read( ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); size_t data_file_size = file_checker.getFileSize(data_file_path); if (!data_file_size) @@ -396,7 +396,7 @@ SinkToStoragePtr StorageStripeLog::write(const ASTPtr & /*query*/, const Storage { WriteLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return std::make_shared(*this, metadata_snapshot, std::move(lock)); } @@ -406,7 +406,7 @@ CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, ContextPtr { ReadLock lock{rwlock, getLockTimeout(local_context)}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); return file_checker.check(); } @@ -435,7 +435,7 @@ void StorageStripeLog::loadIndices(std::chrono::seconds lock_timeout) /// a data race between two threads trying to load indices simultaneously. WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); loadIndices(lock); } @@ -532,7 +532,7 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec ReadLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); if (!file_checker.getFileSize(data_file_path)) return; @@ -603,7 +603,7 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String & { WriteLock lock{rwlock, lock_timeout}; if (!lock) - throw Exception("Lock timeout exceeded", ErrorCodes::TIMEOUT_EXCEEDED); + throw Exception(ErrorCodes::TIMEOUT_EXCEEDED, "Lock timeout exceeded"); /// Load the indices if not loaded yet. We have to do that now because we're going to update these indices. loadIndices(lock); @@ -675,9 +675,8 @@ void registerStorageStripeLog(StorageFactory & factory) factory.registerStorage("StripeLog", [](const StorageFactory::Arguments & args) { if (!args.engine_args.empty()) - throw Exception( - "Engine " + args.engine_name + " doesn't support any arguments (" + toString(args.engine_args.size()) + " given)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Engine {} doesn't support any arguments ({} given)", + args.engine_name, args.engine_args.size()); String disk_name = getDiskName(*args.storage_def); DiskPtr disk = args.getContext()->getDisk(disk_name); diff --git a/src/Storages/StorageTableFunction.h b/src/Storages/StorageTableFunction.h index b105e50a54f..9ba7497fbf2 100644 --- a/src/Storages/StorageTableFunction.h +++ b/src/Storages/StorageTableFunction.h @@ -140,7 +140,7 @@ public: auto actual_structure = storage->getInMemoryMetadataPtr()->getSampleBlock(); if (!blocksHaveEqualStructure(actual_structure, cached_structure) && add_conversion) { - throw Exception("Source storage and table function have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS); + throw Exception(ErrorCodes::INCOMPATIBLE_COLUMNS, "Source storage and table function have different structure"); } return storage->write(query, metadata_snapshot, context); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 7559d9c720e..9bcbc9e6f45 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -157,13 +157,6 @@ namespace }; using URIInfoPtr = std::shared_ptr; - void onCancel() override - { - std::lock_guard lock(reader_mutex); - if (reader) - reader->cancel(); - } - static void setCredentials(Poco::Net::HTTPBasicCredentials & credentials, const Poco::URI & request_uri) { const auto & user_info = request_uri.getUserInfo(); @@ -241,6 +234,13 @@ namespace { while (true) { + if (isCancelled()) + { + if (reader) + reader->cancel(); + break; + } + if (!reader) { auto current_uri_pos = uri_info->next_uri_to_read.fetch_add(1); @@ -249,18 +249,17 @@ namespace auto current_uri = uri_info->uri_list_to_read[current_uri_pos]; - std::lock_guard lock(reader_mutex); initialize(current_uri); } Chunk chunk; - std::lock_guard lock(reader_mutex); if (reader->pull(chunk)) return chunk; pipeline->reset(); reader.reset(); } + return {}; } static std::unique_ptr getFirstAvailableURLReadBuffer( @@ -443,9 +442,6 @@ namespace std::unique_ptr read_buf; std::unique_ptr pipeline; std::unique_ptr reader; - /// onCancell and generate can be called concurrently and both of them - /// have R/W access to reader pointer. - std::mutex reader_mutex; Poco::Net::HTTPBasicCredentials credentials; }; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 9a75f8277fd..1a7050b4dff 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -113,7 +113,7 @@ StorageView::StorageView( storage_metadata.setComment(comment); if (!query.select) - throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName()); SelectQueryDescription description; description.inner_query = query.select->ptr(); @@ -138,7 +138,7 @@ void StorageView::read( if (query_info.view_query) { if (!query_info.view_query->as()) - throw Exception("Unexpected optimized VIEW query", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected optimized VIEW query"); current_inner_query = query_info.view_query->clone(); } @@ -193,12 +193,12 @@ void StorageView::read( static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_query) { if (!select_query.tables() || select_query.tables()->children.empty()) - throw Exception("Logical error: no table expression in view select AST", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: no table expression in view select AST"); auto * select_element = select_query.tables()->children[0]->as(); if (!select_element->table_expression) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); return select_element->table_expression->as(); } @@ -229,7 +229,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ } if (!table_expression->database_and_table_name) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); } DatabaseAndTableWithAlias db_table(table_expression->database_and_table_name); @@ -292,7 +292,7 @@ ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr ASTTableExpression * table_expression = getFirstTableExpression(select_query); if (!table_expression->subquery) - throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: incorrect table expression"); ASTPtr subquery = table_expression->subquery; table_expression->subquery = {}; @@ -309,7 +309,7 @@ void registerStorageView(StorageFactory & factory) factory.registerStorage("View", [](const StorageFactory::Arguments & args) { if (args.query.storage) - throw Exception("Specifying ENGINE is not allowed for a View", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Specifying ENGINE is not allowed for a View"); return std::make_shared(args.table_id, args.query, args.columns, args.comment); }); diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 5f57d37278b..7f073c0e2fe 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -165,8 +165,8 @@ namespace ASTs & engine_args = args.engine_args; if (engine_args.size() != 3) - throw Exception("Storage " + name + " requires exactly 3 parameters: " + name + "('DSN', database or schema, table)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Storage {} requires exactly 3 parameters: {}('DSN', database or schema, table)", name, name); for (size_t i = 0; i < 3; ++i) engine_args[i] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[i], args.getLocalContext()); diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f69f9f8ee7f..6ca6a9db046 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -30,6 +30,7 @@ const char * auto_contributors[] { "Aleksandr Shalimov", "Aleksandra (Ася)", "Aleksandrov Vladimir", + "Aleksei Filatov", "Aleksei Levushkin", "Aleksei Semiglazov", "Aleksey", @@ -192,6 +193,7 @@ const char * auto_contributors[] { "Bill", "BiteTheDDDDt", "BlahGeek", + "Bo Lu", "Bogdan", "Bogdan Voronin", "BohuTANG", @@ -256,6 +258,7 @@ const char * auto_contributors[] { "Denis Krivak", "Denis Zhuravlev", "Denny Crane", + "Denys Golotiuk", "Derek Chia", "Derek Perkins", "Diego Nieto (lesandie)", @@ -300,6 +303,7 @@ const char * auto_contributors[] { "Elizaveta Mironyuk", "Elykov Alexandr", "Emmanuel Donin de Rosière", + "Enrique Herreros", "Eric", "Eric Daniel", "Erixonich", @@ -476,6 +480,7 @@ const char * auto_contributors[] { "Kirill Shvakov", "Koblikov Mihail", "KochetovNicolai", + "Konstantin Bogdanov", "Konstantin Grabar", "Konstantin Ilchenko", "Konstantin Lebedev", @@ -571,6 +576,7 @@ const char * auto_contributors[] { "Mc.Spring", "Meena Renganathan", "Meena-Renganathan", + "MeenaRenganathan22", "MeiK", "Memo", "Metehan Çetinkaya", @@ -866,10 +872,12 @@ const char * auto_contributors[] { "VDimir", "VVMak", "Vadim", + "Vadim Akhma", "Vadim Plakhtinskiy", "Vadim Skipin", "Vadim Volodin", "VadimPE", + "Vage Ogannisian", "Val", "Valera Ryaboshapko", "Varinara", @@ -1033,6 +1041,7 @@ const char * auto_contributors[] { "bobrovskij artemij", "booknouse", "bseng", + "candiduslynx", "canenoneko", "caspian", "cekc", @@ -1266,6 +1275,7 @@ const char * auto_contributors[] { "maxim-babenko", "maxkuzn", "maxulan", + "mayamika", "mehanizm", "melin", "memo", @@ -1348,7 +1358,10 @@ const char * auto_contributors[] { "ritaank", "rnbondarenko", "robert", + "robot-ch-test-poll1", + "robot-ch-test-poll4", "robot-clickhouse", + "robot-clickhouse-ci-1", "robot-metrika-test", "rodrigargar", "roman", @@ -1372,7 +1385,9 @@ const char * auto_contributors[] { "shedx", "shuchaome", "shuyang", + "sichenzhao", "simon-says", + "simpleton", "snyk-bot", "songenjie", "sperlingxx", @@ -1380,6 +1395,7 @@ const char * auto_contributors[] { "spongedc", "spume", "spyros87", + "stan", "stavrolia", "stepenhu", "su-houzhen", @@ -1435,6 +1451,7 @@ const char * auto_contributors[] { "wangdh15", "weeds085490", "whysage", + "wineternity", "wuxiaobai24", "wzl", "xPoSx", @@ -1458,6 +1475,7 @@ const char * auto_contributors[] { "yonesko", "youenn lebras", "young scott", + "yuanyimeng", "yuchuansun", "yuefoo", "yulu86", diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index b35fdd3f85e..d8a49880257 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -12,6 +12,8 @@ NamesAndTypesList StorageSystemFormats::getNamesAndTypes() {"name", std::make_shared()}, {"is_input", std::make_shared()}, {"is_output", std::make_shared()}, + {"supports_parallel_parsing", std::make_shared()}, + {"supports_parallel_formatting", std::make_shared()}, }; } @@ -23,9 +25,14 @@ void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, co const auto & [format_name, creators] = pair; UInt64 has_input_format(creators.input_creator != nullptr); UInt64 has_output_format(creators.output_creator != nullptr); + UInt64 supports_parallel_parsing(creators.file_segmentation_engine != nullptr); + UInt64 supports_parallel_formatting(creators.supports_parallel_formatting); + res_columns[0]->insert(format_name); res_columns[1]->insert(has_input_format); res_columns[2]->insert(has_output_format); + res_columns[3]->insert(supports_parallel_parsing); + res_columns[4]->insert(supports_parallel_formatting); } } diff --git a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp b/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp index 731cdf94157..3bb92814a2f 100644 --- a/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp +++ b/src/Storages/System/StorageSystemMergeTreeMetadataCache.cpp @@ -100,8 +100,9 @@ void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns, bool precise = false; String key = extractKey(query_info.query, precise); if (key.empty()) - throw Exception( - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' or key LIKE 'prefix%' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " + "or key LIKE 'prefix%' in WHERE clause."); auto cache = context->getMergeTreeMetadataCache(); if (precise) @@ -118,8 +119,9 @@ void StorageSystemMergeTreeMetadataCache::fillData(MutableColumns & res_columns, { String target = extractFixedPrefixFromLikePattern(key, /*requires_perfect_prefix*/ false); if (target.empty()) - throw Exception( - "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' or key LIKE 'prefix%' in WHERE clause.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "SELECT from system.merge_tree_metadata_cache table must contain condition like key = 'key' " + "or key LIKE 'prefix%' in WHERE clause."); Strings keys; Strings values; diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index a0c022f5540..fb7f4f9066a 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -232,7 +232,7 @@ StoragesInfo StoragesInfoStream::next() info.data = dynamic_cast(info.storage.get()); if (!info.data) - throw Exception("Unknown engine " + info.engine, ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown engine {}", info.engine); return info; } diff --git a/src/Storages/System/StorageSystemQueryResultCache.cpp b/src/Storages/System/StorageSystemQueryResultCache.cpp new file mode 100644 index 00000000000..cb6349b6d47 --- /dev/null +++ b/src/Storages/System/StorageSystemQueryResultCache.cpp @@ -0,0 +1,55 @@ +#include "StorageSystemQueryResultCache.h" +#include +#include +#include +#include +#include + + +namespace DB +{ + +NamesAndTypesList StorageSystemQueryResultCache::getNamesAndTypes() +{ + return { + {"query", std::make_shared()}, + {"key_hash", std::make_shared()}, + {"expires_at", std::make_shared()}, + {"stale", std::make_shared()}, + {"shared", std::make_shared()}, + {"result_size", std::make_shared()} + }; +} + +StorageSystemQueryResultCache::StorageSystemQueryResultCache(const StorageID & table_id_) + : IStorageSystemOneBlock(table_id_) +{ +} + +void StorageSystemQueryResultCache::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const +{ + auto query_result_cache = context->getQueryResultCache(); + + if (!query_result_cache) + return; + + const String & username = context->getUserName(); + + std::lock_guard lock(query_result_cache->mutex); + + for (const auto & [key, result] : query_result_cache->cache) + { + /// Showing other user's queries is considered a security risk + if (key.username.has_value() && key.username != username) + continue; + + res_columns[0]->insert(key.queryStringFromAst()); /// approximates the original query string + res_columns[1]->insert(key.ast->getTreeHash().first); + res_columns[2]->insert(std::chrono::system_clock::to_time_t(key.expires_at)); + res_columns[3]->insert(key.expires_at < std::chrono::system_clock::now()); + res_columns[4]->insert(!key.username.has_value()); + res_columns[5]->insert(result.sizeInBytes()); + } +} + +} diff --git a/src/Storages/System/StorageSystemQueryResultCache.h b/src/Storages/System/StorageSystemQueryResultCache.h new file mode 100644 index 00000000000..4862878a31a --- /dev/null +++ b/src/Storages/System/StorageSystemQueryResultCache.h @@ -0,0 +1,21 @@ +#pragma once + +#include + +namespace DB +{ + +class StorageSystemQueryResultCache final : public IStorageSystemOneBlock +{ +public: + explicit StorageSystemQueryResultCache(const StorageID & table_id_); + + std::string getName() const override { return "SystemQueryResultCache"; } + + static NamesAndTypesList getNamesAndTypes(); + +protected: + void fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo & query_info) const override; +}; + +} diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index df3d8b74e6e..da3d6b98dc5 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -151,7 +151,7 @@ namespace continue; /// Drain delayed notifications. } - throw Exception("Logical error: read wrong number of bytes from pipe", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: read wrong number of bytes from pipe"); } } diff --git a/src/Storages/System/StorageSystemZooKeeper.cpp b/src/Storages/System/StorageSystemZooKeeper.cpp index 1212d9da60a..9663c76a5c3 100644 --- a/src/Storages/System/StorageSystemZooKeeper.cpp +++ b/src/Storages/System/StorageSystemZooKeeper.cpp @@ -122,22 +122,22 @@ public: /// We don't expect a "name" contains a path. if (name.find('/') != std::string::npos) { - throw Exception("Column `name` should not contain '/'", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column `name` should not contain '/'"); } if (name.empty()) { - throw Exception("Column `name` should not be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column `name` should not be empty"); } if (path.empty()) { - throw Exception("Column `path` should not be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column `path` should not be empty"); } if (path.size() + name.size() > PATH_MAX) { - throw Exception("Sum of `name` length and `path` length should not exceed PATH_MAX", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Sum of `name` length and `path` length should not exceed PATH_MAX"); } std::vector path_vec; @@ -176,7 +176,7 @@ StorageSystemZooKeeper::StorageSystemZooKeeper(const StorageID & table_id_) SinkToStoragePtr StorageSystemZooKeeper::write(const ASTPtr &, const StorageMetadataPtr &, ContextPtr context) { if (!context->getConfigRef().getBool("allow_zookeeper_write", false)) - throw Exception("Prohibit writing to system.zookeeper, unless config `allow_zookeeper_write` as true", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Prohibit writing to system.zookeeper, unless config `allow_zookeeper_write` as true"); Block write_header; write_header.insert(ColumnWithTypeAndName(std::make_shared(), "name")); write_header.insert(ColumnWithTypeAndName(std::make_shared(), "value")); @@ -404,7 +404,10 @@ void StorageSystemZooKeeper::fillData(MutableColumns & res_columns, ContextPtr c zkutil::ZooKeeperPtr zookeeper = context->getZooKeeper(); if (paths.empty()) - throw Exception("SELECT from system.zookeeper table must contain condition like path = 'path' or path IN ('path1','path2'...) or path IN (subquery) in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "SELECT from system.zookeeper table must contain condition like path = 'path' " + "or path IN ('path1','path2'...) or path IN (subquery) " + "in WHERE clause unless `set allow_unrestricted_reads_from_keeper = 'true'`."); std::unordered_set added; while (!paths.empty()) diff --git a/src/Storages/System/attachSystemTables.cpp b/src/Storages/System/attachSystemTables.cpp index e82f7c9bb2b..eeb08d7e3d4 100644 --- a/src/Storages/System/attachSystemTables.cpp +++ b/src/Storages/System/attachSystemTables.cpp @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -175,6 +176,7 @@ void attachSystemTablesServer(ContextPtr context, IDatabase & system_database, b attach(context, system_database, "part_moves_between_shards"); attach(context, system_database, "asynchronous_inserts"); attach(context, system_database, "filesystem_cache"); + attach(context, system_database, "query_result_cache"); attach(context, system_database, "remote_data_paths"); attach(context, system_database, "certificates"); attach(context, system_database, "named_collections"); diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index 2971d977099..e1a80800630 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -63,11 +63,9 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin { const IFunctionBase & func = *action.node->function_base; if (!func.isDeterministic()) - throw Exception( - "TTL expression cannot contain non-deterministic functions, " - "but contains function " - + func.getName(), - ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "TTL expression cannot contain non-deterministic functions, but contains function {}", + func.getName()); } } @@ -76,9 +74,9 @@ void checkTTLExpression(const ExpressionActionsPtr & ttl_expression, const Strin if (!typeid_cast(result_column.type.get()) && !typeid_cast(result_column.type.get())) { - throw Exception( - "TTL expression result column should have DateTime or Date type, but has " + result_column.type->getName(), - ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, + "TTL expression result column should have DateTime or Date type, but has {}", + result_column.type->getName()); } } @@ -206,7 +204,7 @@ TTLDescription TTLDescription::getTTLFromAST( const auto & pk_columns = primary_key.column_names; if (ttl_element->group_by_key.size() > pk_columns.size()) - throw Exception("TTL Expression GROUP BY key should be a prefix of primary key", ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key"); NameSet aggregation_columns_set; NameSet used_primary_key_columns_set; @@ -214,9 +212,7 @@ TTLDescription TTLDescription::getTTLFromAST( for (size_t i = 0; i < ttl_element->group_by_key.size(); ++i) { if (ttl_element->group_by_key[i]->getColumnName() != pk_columns[i]) - throw Exception( - "TTL Expression GROUP BY key should be a prefix of primary key", - ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "TTL Expression GROUP BY key should be a prefix of primary key"); used_primary_key_columns_set.insert(pk_columns[i]); } @@ -240,9 +236,7 @@ TTLDescription TTLDescription::getTTLFromAST( } if (aggregation_columns_set.size() != ttl_element->group_by_assignments.size()) - throw Exception( - "Multiple aggregations set for one column in TTL Expression", - ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "Multiple aggregations set for one column in TTL Expression"); result.group_by_keys = Names(pk_columns.begin(), pk_columns.begin() + ttl_element->group_by_key.size()); @@ -350,7 +344,7 @@ TTLTableDescription TTLTableDescription::getTTLForTableFromAST( if (!ttl.where_expression) { if (have_unconditional_delete_ttl) - throw Exception("More than one DELETE TTL expression without WHERE expression is not allowed", ErrorCodes::BAD_TTL_EXPRESSION); + throw Exception(ErrorCodes::BAD_TTL_EXPRESSION, "More than one DELETE TTL expression without WHERE expression is not allowed"); have_unconditional_delete_ttl = true; result.rows_ttl = ttl; diff --git a/src/Storages/UVLoop.h b/src/Storages/UVLoop.h index 6b24252077e..4945e1b56fa 100644 --- a/src/Storages/UVLoop.h +++ b/src/Storages/UVLoop.h @@ -25,7 +25,7 @@ public: int res = uv_loop_init(loop_ptr.get()); if (res != 0) - throw Exception("UVLoop could not initialize", ErrorCodes::SYSTEM_ERROR); + throw Exception(ErrorCodes::SYSTEM_ERROR, "UVLoop could not initialize"); } ~UVLoop() diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index ffa04bcdd83..4ff00facfdc 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -132,7 +132,7 @@ void rewriteEntityInAst(ASTPtr ast, const String & column_name, const Field & va bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block block, ASTPtr & expression_ast) { if (block.rows() == 0) - throw Exception("Cannot prepare filter with empty block", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot prepare filter with empty block"); /// Take the first row of the input block to build a constant block auto columns = block.getColumns(); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index 442a7822e33..8fb2470495f 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -119,8 +119,7 @@ namespace else { if (data.check_duplicate_window && serializeAST(*temp_node) != data.serialized_window_function) - throw Exception( - "WINDOW VIEW only support ONE TIME WINDOW FUNCTION", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "WINDOW VIEW only support ONE TIME WINDOW FUNCTION"); t->name = "windowID"; } } @@ -263,7 +262,7 @@ namespace if (!interval_unit || (interval_unit->value.getType() != Field::Types::String && interval_unit->value.getType() != Field::Types::UInt64)) - throw Exception("Interval argument must be integer", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interval argument must be integer"); if (interval_unit->value.getType() == Field::Types::String) num_units = parse(interval_unit->value.safeGet()); @@ -271,7 +270,7 @@ namespace num_units = interval_unit->value.safeGet(); if (num_units <= 0) - throw Exception("Value for Interval argument must be positive.", ErrorCodes::ARGUMENT_OUT_OF_BOUND); + throw Exception(ErrorCodes::ARGUMENT_OUT_OF_BOUND, "Value for Interval argument must be positive."); } UInt32 addTime(UInt32 time_sec, IntervalKind::Kind kind, Int64 num_units, const DateLUTImpl & time_zone) @@ -281,7 +280,7 @@ namespace case IntervalKind::Nanosecond: case IntervalKind::Microsecond: case IntervalKind::Millisecond: - throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by windows yet"); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: { \ return AddTime::execute(time_sec, num_units, time_zone); \ @@ -367,7 +366,7 @@ static void extractDependentTable(ContextPtr context, ASTPtr & query, String & s else if (auto * ast_select = subquery->as()) { if (ast_select->list_of_selects->children.size() != 1) - throw Exception("UNION is not supported for WINDOW VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW); + throw Exception(ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_WINDOW_VIEW, "UNION is not supported for WINDOW VIEW"); auto & inner_select_query = ast_select->list_of_selects->children.at(0); @@ -872,7 +871,7 @@ UInt32 StorageWindowView::getWindowLowerBound(UInt32 time_sec) case IntervalKind::Nanosecond: case IntervalKind::Microsecond: case IntervalKind::Millisecond: - throw Exception("Fractional seconds are not supported by windows yet", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by windows yet"); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ { \ @@ -905,7 +904,7 @@ UInt32 StorageWindowView::getWindowUpperBound(UInt32 time_sec) case IntervalKind::Nanosecond: case IntervalKind::Microsecond: case IntervalKind::Millisecond: - throw Exception("Fractional seconds are not supported by window view yet", ErrorCodes::SYNTAX_ERROR); + throw Exception(ErrorCodes::SYNTAX_ERROR, "Fractional seconds are not supported by window view yet"); #define CASE_WINDOW_KIND(KIND) \ case IntervalKind::KIND: \ @@ -1168,9 +1167,9 @@ StorageWindowView::StorageWindowView( /// If the target table is not set, use inner target table has_inner_target_table = query.to_table_id.empty(); if (has_inner_target_table && !query.storage) - throw Exception( - "You must specify where to save results of a WindowView query: either ENGINE or an existing table in a TO clause", - ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, + "You must specify where to save results of a WindowView query: " + "either ENGINE or an existing table in a TO clause"); if (query.select->list_of_selects->children.size() != 1) throw Exception( @@ -1254,7 +1253,7 @@ ASTPtr StorageWindowView::initInnerQuery(ASTSelectQuery query, ContextPtr contex ReplaceFunctionNowVisitor(func_now_data).visit(mergeable_query); is_time_column_func_now = func_now_data.is_time_column_func_now; if (!is_proctime && is_time_column_func_now) - throw Exception("now() is not supported for Event time processing.", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "now() is not supported for Event time processing."); if (is_time_column_func_now) window_id_name = func_now_data.window_id_name; @@ -1443,11 +1442,11 @@ void StorageWindowView::writeIntoWindowView( }); } - std::shared_lock fire_signal_lock; + std::shared_lock fire_signal_lock; QueryPipelineBuilder builder; if (window_view.is_proctime) { - fire_signal_lock = std::shared_lock(window_view.fire_signal_mutex); + fire_signal_lock = std::shared_lock(window_view.fire_signal_mutex); /// Fill ____timestamp column with current time in case of now() time column. if (window_view.is_time_column_func_now) @@ -1663,9 +1662,9 @@ void registerStorageWindowView(StorageFactory & factory) factory.registerStorage("WindowView", [](const StorageFactory::Arguments & args) { if (!args.attach && !args.getLocalContext()->getSettingsRef().allow_experimental_window_view) - throw Exception( - "Experimental WINDOW VIEW feature is not enabled (the setting 'allow_experimental_window_view')", - ErrorCodes::SUPPORT_IS_DISABLED); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, + "Experimental WINDOW VIEW feature " + "is not enabled (the setting 'allow_experimental_window_view')"); return std::make_shared(args.table_id, args.getLocalContext(), args.query, args.columns, args.attach); }); diff --git a/src/Storages/WindowView/StorageWindowView.h b/src/Storages/WindowView/StorageWindowView.h index 6da34389e4d..b313e466211 100644 --- a/src/Storages/WindowView/StorageWindowView.h +++ b/src/Storages/WindowView/StorageWindowView.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -213,7 +214,7 @@ private: /// Mutex for the blocks and ready condition std::mutex mutex; - std::shared_mutex fire_signal_mutex; + SharedMutex fire_signal_mutex; mutable std::mutex sample_block_lock; /// Mutex to protect access to sample block IntervalKind::Kind window_kind; diff --git a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp index 31f49fa5490..75c5bebb901 100644 --- a/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp +++ b/src/Storages/addColumnsStructureToQueryWithClusterEngine.cpp @@ -37,7 +37,8 @@ void addColumnsStructureToQueryWithClusterEngine(ASTPtr & query, const String & auto structure_literal = std::make_shared(structure); if (expression_list->children.size() < 2 || expression_list->children.size() > max_arguments) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", function_name, max_arguments, expression_list->children.size()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 2 to {} arguments in {} table functions, got {}", + function_name, max_arguments, expression_list->children.size()); if (expression_list->children.size() == 2 || expression_list->children.size() == max_arguments - 1) { diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 7bd5e629c39..0721cfaa9c4 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -155,9 +155,8 @@ ColumnsDescription getStructureOfRemoteTable( } } - throw NetException( - "All attempts to get table structure failed. Log: \n\n" + fail_messages + "\n", - ErrorCodes::NO_REMOTE_SHARD_AVAILABLE); + throw NetException(ErrorCodes::NO_REMOTE_SHARD_AVAILABLE, + "All attempts to get table structure failed. Log: \n\n{}\n", fail_messages); } ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( @@ -220,7 +219,7 @@ ColumnsDescriptionByShardNum getExtendedObjectsOfRemoteTables( } if (columns.empty()) - throw NetException("All attempts to get table structure failed", ErrorCodes::NO_REMOTE_SHARD_AVAILABLE); + throw NetException(ErrorCodes::NO_REMOTE_SHARD_AVAILABLE, "All attempts to get table structure failed"); return columns; } diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 51b11680f82..1ff310c3fac 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -116,7 +116,7 @@ bool isCompatible(IAST & node) return false; if (!function->arguments) - throw Exception("Logical error: function->arguments is not set", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Logical error: function->arguments is not set"); String name = function->name; @@ -288,7 +288,7 @@ String transformQueryForExternalDatabase( } else if (strict) { - throw Exception("Query contains non-compatible expressions (and external_table_strict_query=true)", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Query contains non-compatible expressions (and external_table_strict_query=true)"); } else if (const auto * function = original_where->as()) { @@ -309,7 +309,7 @@ String transformQueryForExternalDatabase( } else if (strict && original_where) { - throw Exception("Query contains non-compatible expressions (and external_table_strict_query=true)", ErrorCodes::INCORRECT_QUERY); + throw Exception(ErrorCodes::INCORRECT_QUERY, "Query contains non-compatible expressions (and external_table_strict_query=true)"); } auto * literal_expr = typeid_cast(original_where.get()); diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index d62e44a16cc..8cbffc10e5a 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -45,12 +45,12 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; if (args.empty()) - throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires at least 1 argument", getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); @@ -67,8 +67,10 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context return; if (args.size() != 3 && args.size() != 4) - throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires 1, 2, 3 or 4 arguments: " + "filename, format (default auto), structure (default auto) and compression method (default auto)", + getName()); structure = checkAndGetLiteralArgument(args[2], "structure"); diff --git a/src/TableFunctions/TableFunctionFactory.cpp b/src/TableFunctions/TableFunctionFactory.cpp index ff001661000..76108f1cdd4 100644 --- a/src/TableFunctions/TableFunctionFactory.cpp +++ b/src/TableFunctions/TableFunctionFactory.cpp @@ -21,13 +21,12 @@ void TableFunctionFactory::registerFunction( const std::string & name, Value value, CaseSensitiveness case_sensitiveness) { if (!table_functions.emplace(name, value).second) - throw Exception("TableFunctionFactory: the table function name '" + name + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: the table function name '{}' is not unique", name); if (case_sensitiveness == CaseInsensitive && !case_insensitive_table_functions.emplace(Poco::toLower(name), value).second) - throw Exception("TableFunctionFactory: the case insensitive table function name '" + name + "' is not unique", - ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "TableFunctionFactory: " + "the case insensitive table function name '{}' is not unique", name); KnownTableFunctionNames::instance().add(name, (case_sensitiveness == CaseInsensitive)); } diff --git a/src/TableFunctions/TableFunctionFile.cpp b/src/TableFunctions/TableFunctionFile.cpp index 4ecf29a05bd..9cd71196dcf 100644 --- a/src/TableFunctions/TableFunctionFile.cpp +++ b/src/TableFunctions/TableFunctionFile.cpp @@ -46,11 +46,10 @@ void TableFunctionFile::parseFirstArguments(const ASTPtr & arg, const ContextPtr fd = static_cast( (type == Field::Types::Int64) ? literal->value.get() : literal->value.get()); if (fd < 0) - throw Exception("File descriptor must be non-negative", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "File descriptor must be non-negative"); } else - throw Exception( - "The first argument of table function '" + getName() + "' mush be path or file descriptor", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The first argument of table function '{}' mush be path or file descriptor", getName()); } String TableFunctionFile::getFormatFromFirstArgument() @@ -89,8 +88,7 @@ ColumnsDescription TableFunctionFile::getActualTableStructure(ContextPtr context if (structure == "auto") { if (fd >= 0) - throw Exception( - "Schema inference is not supported for table function '" + getName() + "' with file descriptor", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Schema inference is not supported for table function '{}' with file descriptor", getName()); size_t total_bytes_to_read = 0; Strings paths = StorageFile::getPathsList(filename, context->getUserFilesPath(), context, total_bytes_to_read); return StorageFile::getTableStructureFromFile(format, paths, compression_method, std::nullopt, context); diff --git a/src/TableFunctions/TableFunctionFormat.cpp b/src/TableFunctions/TableFunctionFormat.cpp index b15b350f00b..f2a92b41560 100644 --- a/src/TableFunctions/TableFunctionFormat.cpp +++ b/src/TableFunctions/TableFunctionFormat.cpp @@ -34,12 +34,12 @@ void TableFunctionFormat::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments", getName()); ASTs & args = args_func.at(0)->children; if (args.size() != 2) - throw Exception("Table function '" + getName() + "' requires 2 arguments: format and data", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 2 arguments: format and data", getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); diff --git a/src/TableFunctions/TableFunctionGenerateRandom.cpp b/src/TableFunctions/TableFunctionGenerateRandom.cpp index bed3397152b..5f1a13d8857 100644 --- a/src/TableFunctions/TableFunctionGenerateRandom.cpp +++ b/src/TableFunctions/TableFunctionGenerateRandom.cpp @@ -33,7 +33,7 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; @@ -41,19 +41,18 @@ void TableFunctionGenerateRandom::parseArguments(const ASTPtr & ast_function, Co return; if (args.size() > 4) - throw Exception("Table function '" + getName() + "' requires at most four arguments: " - " structure, [random_seed, max_string_length, max_array_length].", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires at most four arguments: " + " structure, [random_seed, max_string_length, max_array_length].", getName()); // All the arguments must be literals. for (const auto & arg : args) { if (!arg->as()) { - throw Exception(fmt::format( + throw Exception(ErrorCodes::BAD_ARGUMENTS, "All arguments of table function '{}' must be literals. " - "Got '{}' instead", getName(), arg->formatForErrorMessage()), - ErrorCodes::BAD_ARGUMENTS); + "Got '{}' instead", getName(), arg->formatForErrorMessage()); } } diff --git a/src/TableFunctions/TableFunctionHDFSCluster.cpp b/src/TableFunctions/TableFunctionHDFSCluster.cpp index 73b77f770b2..c9c1d167bc0 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.cpp +++ b/src/TableFunctions/TableFunctionHDFSCluster.cpp @@ -41,20 +41,18 @@ void TableFunctionHDFSCluster::parseArguments(const ASTPtr & ast_function, Conte ASTs & args_func = ast_copy->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; - const auto message = fmt::format( - "The signature of table function {} shall be the following:\n" \ - " - cluster, uri\n",\ - " - cluster, uri, format\n",\ - " - cluster, uri, format, structure\n",\ - " - cluster, uri, format, structure, compression_method", - getName()); - if (args.size() < 2 || args.size() > 5) - throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "The signature of table function {} shall be the following:\n" + " - cluster, uri\n", + " - cluster, uri, format\n", + " - cluster, uri, format, structure\n", + " - cluster, uri, format, structure, compression_method", + getName()); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); @@ -85,7 +83,7 @@ ColumnsDescription TableFunctionHDFSCluster::getActualTableStructure(ContextPtr StoragePtr TableFunctionHDFSCluster::getStorage( - const String & /*source*/, const String & /*format_*/, const ColumnsDescription &, ContextPtr context, + const String & /*source*/, const String & /*format_*/, const ColumnsDescription & columns, ContextPtr context, const std::string & table_name, const String & /*compression_method_*/) const { StoragePtr storage; @@ -96,7 +94,7 @@ StoragePtr TableFunctionHDFSCluster::getStorage( filename, StorageID(getDatabaseName(), table_name), format, - getActualTableStructure(context), + columns, ConstraintsDescription{}, String{}, context, @@ -109,8 +107,8 @@ StoragePtr TableFunctionHDFSCluster::getStorage( storage = std::make_shared( context, cluster_name, filename, StorageID(getDatabaseName(), table_name), - format, getActualTableStructure(context), ConstraintsDescription{}, - compression_method); + format, columns, ConstraintsDescription{}, + compression_method, structure != "auto"); } return storage; } diff --git a/src/TableFunctions/TableFunctionHDFSCluster.h b/src/TableFunctions/TableFunctionHDFSCluster.h index a0555a904d1..9641b71c5e3 100644 --- a/src/TableFunctions/TableFunctionHDFSCluster.h +++ b/src/TableFunctions/TableFunctionHDFSCluster.h @@ -28,7 +28,6 @@ public: { return name; } - bool hasStaticStructure() const override { return true; } protected: StoragePtr getStorage( diff --git a/src/TableFunctions/TableFunctionInput.cpp b/src/TableFunctions/TableFunctionInput.cpp index df87afcad3c..4941241acae 100644 --- a/src/TableFunctions/TableFunctionInput.cpp +++ b/src/TableFunctions/TableFunctionInput.cpp @@ -26,7 +26,7 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr const auto * function = ast_function->as(); if (!function->arguments) - throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments", getName()); auto args = function->arguments->children; @@ -37,8 +37,8 @@ void TableFunctionInput::parseArguments(const ASTPtr & ast_function, ContextPtr } if (args.size() != 1) - throw Exception("Table function '" + getName() + "' requires exactly 1 argument: structure", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires exactly 1 argument: structure", getName()); structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "structure"); } diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index b055e241459..742cc7f366b 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -42,16 +42,16 @@ void TableFunctionMerge::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function 'merge' requires exactly 2 arguments" - " - name of source database and regexp for table names.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function 'merge' requires exactly 2 arguments - name " + "of source database and regexp for table names."); ASTs & args = args_func.at(0)->children; if (args.size() != 2) - throw Exception("Table function 'merge' requires exactly 2 arguments" - " - name of source database and regexp for table names.", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function 'merge' requires exactly 2 arguments - name " + "of source database and regexp for table names."); auto [is_regexp, database_ast] = StorageMerge::evaluateDatabaseName(args[0], context); diff --git a/src/TableFunctions/TableFunctionMongoDB.cpp b/src/TableFunctions/TableFunctionMongoDB.cpp index b88b7fda063..31dd64f8254 100644 --- a/src/TableFunctions/TableFunctionMongoDB.cpp +++ b/src/TableFunctions/TableFunctionMongoDB.cpp @@ -55,14 +55,15 @@ void TableFunctionMongoDB::parseArguments(const ASTPtr & ast_function, ContextPt { const auto & func_args = ast_function->as(); if (!func_args.arguments) - throw Exception("Table function 'mongodb' must have arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'mongodb' must have arguments."); ASTs & args = func_args.arguments->children; if (args.size() < 6 || args.size() > 7) { throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Table function 'mongodb' requires from 6 to 7 parameters: mongodb('host:port', database, collection, 'user', 'password', structure, [, 'options'])"); + "Table function 'mongodb' requires from 6 to 7 parameters: " + "mongodb('host:port', database, collection, 'user', 'password', structure, [, 'options'])"); } ASTs main_arguments(args.begin(), args.begin() + 5); diff --git a/src/TableFunctions/TableFunctionMySQL.cpp b/src/TableFunctions/TableFunctionMySQL.cpp index 58858eb495c..0cbad7bd9fd 100644 --- a/src/TableFunctions/TableFunctionMySQL.cpp +++ b/src/TableFunctions/TableFunctionMySQL.cpp @@ -35,7 +35,7 @@ void TableFunctionMySQL::parseArguments(const ASTPtr & ast_function, ContextPtr const auto & args_func = ast_function->as(); if (!args_func.arguments) - throw Exception("Table function 'mysql' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function 'mysql' must have arguments."); auto & args = args_func.arguments->children; diff --git a/src/TableFunctions/TableFunctionNull.cpp b/src/TableFunctions/TableFunctionNull.cpp index 9ff07cc1946..d2c091a7b5f 100644 --- a/src/TableFunctions/TableFunctionNull.cpp +++ b/src/TableFunctions/TableFunctionNull.cpp @@ -21,13 +21,12 @@ void TableFunctionNull::parseArguments(const ASTPtr & ast_function, ContextPtr c { const auto * function = ast_function->as(); if (!function || !function->arguments) - throw Exception("Table function '" + getName() + "' requires 'structure'", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'structure'", getName()); const auto & arguments = function->arguments->children; if (!arguments.empty() && arguments.size() != 1) - throw Exception( - "Table function '" + getName() + "' requires 'structure' argument or empty argument", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, + "Table function '{}' requires 'structure' argument or empty argument", getName()); if (!arguments.empty()) structure = checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(arguments[0], context), "structure"); diff --git a/src/TableFunctions/TableFunctionNumbers.cpp b/src/TableFunctions/TableFunctionNumbers.cpp index 2056cd838f5..ba7a4dc4b36 100644 --- a/src/TableFunctions/TableFunctionNumbers.cpp +++ b/src/TableFunctions/TableFunctionNumbers.cpp @@ -37,7 +37,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_f auto arguments = function->arguments->children; if (arguments.size() != 1 && arguments.size() != 2) - throw Exception("Table function '" + getName() + "' requires 'length' or 'offset, length'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length' or 'offset, length'.", getName()); UInt64 offset = arguments.size() == 2 ? evaluateArgument(context, arguments[0]) : 0; UInt64 length = arguments.size() == 2 ? evaluateArgument(context, arguments[1]) : evaluateArgument(context, arguments[0]); @@ -46,7 +46,7 @@ StoragePtr TableFunctionNumbers::executeImpl(const ASTPtr & ast_f res->startup(); return res; } - throw Exception("Table function '" + getName() + "' requires 'limit' or 'offset, limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'limit' or 'offset, limit'.", getName()); } void registerTableFunctionNumbers(TableFunctionFactory & factory) @@ -65,7 +65,8 @@ UInt64 TableFunctionNumbers::evaluateArgument(ContextPtr context, Field converted = convertFieldToType(field, DataTypeUInt64()); if (converted.isNull()) - throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", applyVisitor(FieldVisitorToString(), field)); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "The value {} is not representable as UInt64", + applyVisitor(FieldVisitorToString(), field)); return converted.safeGet(); } diff --git a/src/TableFunctions/TableFunctionPostgreSQL.cpp b/src/TableFunctions/TableFunctionPostgreSQL.cpp index 2edfe82c708..ab6212d0e30 100644 --- a/src/TableFunctions/TableFunctionPostgreSQL.cpp +++ b/src/TableFunctions/TableFunctionPostgreSQL.cpp @@ -58,7 +58,7 @@ void TableFunctionPostgreSQL::parseArguments(const ASTPtr & ast_function, Contex { const auto & func_args = ast_function->as(); if (!func_args.arguments) - throw Exception("Table function 'PostgreSQL' must have arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'PostgreSQL' must have arguments."); configuration.emplace(StoragePostgreSQL::getConfiguration(func_args.arguments->children, context)); const auto & settings = context->getSettingsRef(); diff --git a/src/TableFunctions/TableFunctionRemote.cpp b/src/TableFunctions/TableFunctionRemote.cpp index 14e0774cf06..7fd6ea10ddc 100644 --- a/src/TableFunctions/TableFunctionRemote.cpp +++ b/src/TableFunctions/TableFunctionRemote.cpp @@ -145,7 +145,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr if (!tryGetIdentifierNameInto(args[arg_num], cluster_name)) { if (!get_string_literal(*args[arg_num], cluster_description)) - throw Exception("Hosts pattern must be string literal (in single quotes).", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Hosts pattern must be string literal (in single quotes)."); } } @@ -243,7 +243,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr names.push_back(parseRemoteDescription(shard, 0, shard.size(), '|', max_addresses)); if (names.empty()) - throw Exception("Shard list is empty after parsing first argument", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Shard list is empty after parsing first argument"); auto maybe_secure_port = context->getTCPPortSecure(); @@ -276,7 +276,7 @@ void TableFunctionRemote::parseArguments(const ASTPtr & ast_function, ContextPtr } if (!remote_table_function_ptr && configuration.table.empty()) - throw Exception("The name of remote table cannot be empty", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "The name of remote table cannot be empty"); remote_table_id.database_name = configuration.database; remote_table_id.table_name = configuration.table; diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 687ee51a0a5..7a7f6b37b81 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -127,7 +127,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con getName()); if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); auto & args = args_func.at(0)->children; diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp index 82790e1a328..5c3e60c125e 100644 --- a/src/TableFunctions/TableFunctionS3Cluster.cpp +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -42,25 +42,23 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' must have arguments.", getName()); ASTs & args = args_func.at(0)->children; for (auto & arg : args) arg = evaluateConstantExpressionAsLiteral(arg, context); - const auto message = fmt::format( - "The signature of table function {} could be the following:\n" \ - " - cluster, url\n" - " - cluster, url, format\n" \ - " - cluster, url, format, structure\n" \ - " - cluster, url, access_key_id, secret_access_key\n" \ - " - cluster, url, format, structure, compression_method\n" \ - " - cluster, url, access_key_id, secret_access_key, format\n" - " - cluster, url, access_key_id, secret_access_key, format, structure\n" \ - " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method", - getName()); - + constexpr auto fmt_string = "The signature of table function {} could be the following:\n" + " - cluster, url\n" + " - cluster, url, format\n" + " - cluster, url, format, structure\n" + " - cluster, url, access_key_id, secret_access_key\n" + " - cluster, url, format, structure, compression_method\n" + " - cluster, url, access_key_id, secret_access_key, format\n" + " - cluster, url, access_key_id, secret_access_key, format, structure\n" + " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method"; + auto message = PreformattedMessage{fmt::format(fmt_string, getName()), fmt_string}; if (args.size() < 2 || args.size() > 7) throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -76,7 +74,7 @@ void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, Context std::copy(args.begin() + 1, args.end(), std::back_inserter(clipped_args)); /// StorageS3ClusterConfiguration inherints from StorageS3Configuration, so it is safe to upcast it. - TableFunctionS3::parseArgumentsImpl(message, clipped_args, context, static_cast(configuration)); + TableFunctionS3::parseArgumentsImpl(message.message, clipped_args, context, static_cast(configuration)); } diff --git a/src/TableFunctions/TableFunctionSQLite.cpp b/src/TableFunctions/TableFunctionSQLite.cpp index 64ff93494db..13c6fcea60c 100644 --- a/src/TableFunctions/TableFunctionSQLite.cpp +++ b/src/TableFunctions/TableFunctionSQLite.cpp @@ -62,13 +62,12 @@ void TableFunctionSQLite::parseArguments(const ASTPtr & ast_function, ContextPtr const auto & func_args = ast_function->as(); if (!func_args.arguments) - throw Exception("Table function 'sqlite' must have arguments.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function 'sqlite' must have arguments."); ASTs & args = func_args.arguments->children; if (args.size() != 2) - throw Exception("SQLite database requires 2 arguments: database path, table name", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "SQLite database requires 2 arguments: database path, table name"); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); diff --git a/src/TableFunctions/TableFunctionValues.cpp b/src/TableFunctions/TableFunctionValues.cpp index 05574825275..545427f30c9 100644 --- a/src/TableFunctions/TableFunctionValues.cpp +++ b/src/TableFunctions/TableFunctionValues.cpp @@ -56,7 +56,7 @@ static void parseAndInsertValues(MutableColumns & res_columns, const ASTs & args const Tuple & value_tuple = value_field.safeGet(); if (value_tuple.size() != sample_block.columns()) - throw Exception("Values size should match with number of columns", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Values size should match with number of columns"); const DataTypes & value_types_tuple = type_tuple->getElements(); for (size_t j = 0; j < value_tuple.size(); ++j) @@ -83,12 +83,12 @@ void TableFunctionValues::parseArguments(const ASTPtr & ast_function, ContextPtr ASTs & args_func = ast_function->children; if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments", ErrorCodes::LOGICAL_ERROR); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' must have arguments", getName()); ASTs & args = args_func.at(0)->children; if (args.empty()) - throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires at least 1 argument", getName()); const auto & literal = args[0]->as(); String value; diff --git a/src/TableFunctions/TableFunctionView.cpp b/src/TableFunctions/TableFunctionView.cpp index c999cba08e9..578c497e720 100644 --- a/src/TableFunctions/TableFunctionView.cpp +++ b/src/TableFunctions/TableFunctionView.cpp @@ -32,7 +32,7 @@ void TableFunctionView::parseArguments(const ASTPtr & ast_function, ContextPtr / return; } } - throw Exception("Table function '" + getName() + "' requires a query argument.", ErrorCodes::BAD_ARGUMENTS); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table function '{}' requires a query argument.", getName()); } ColumnsDescription TableFunctionView::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionZeros.cpp b/src/TableFunctions/TableFunctionZeros.cpp index 5874fca67e6..d1c67659f56 100644 --- a/src/TableFunctions/TableFunctionZeros.cpp +++ b/src/TableFunctions/TableFunctionZeros.cpp @@ -34,7 +34,7 @@ StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_fun auto arguments = function->arguments->children; if (arguments.size() != 1) - throw Exception("Table function '" + getName() + "' requires 'length'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'length'.", getName()); UInt64 length = evaluateArgument(context, arguments[0]); @@ -43,7 +43,7 @@ StoragePtr TableFunctionZeros::executeImpl(const ASTPtr & ast_fun res->startup(); return res; } - throw Exception("Table function '" + getName() + "' requires 'limit'.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Table function '{}' requires 'limit'.", getName()); } void registerTableFunctionZeros(TableFunctionFactory & factory) diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index f7c69445eed..a7af807c57c 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -311,11 +311,13 @@ class Backport: logging.info("Active releases: %s", ", ".join(self.release_branches)) def receive_prs_for_backport(self): - # The commit is the oldest open release branch's merge-base - since_commit = git_runner( - f"git merge-base {self.remote}/{self.release_branches[0]} " - f"{self.remote}/{self.default_branch}" + # The commits in the oldest open release branch + oldest_branch_commits = git_runner( + "git log --no-merges --format=%H --reverse " + f"{self.remote}/{self.default_branch}..{self.remote}/{self.release_branches[0]}" ) + # The first commit is the one we are looking for + since_commit = oldest_branch_commits.split("\n", 1)[0] since_date = date.fromisoformat( git_runner.run(f"git log -1 --format=format:%cs {since_commit}") ) diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 7a87a93c26d..fbec1f93c3b 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -175,6 +175,7 @@ def main(): "test_log.txt" in test_output_files or "test_result.txt" in test_output_files ) test_result_exists = "test_results.tsv" in test_output_files + test_results = [] # type: TestResults if "submodule_log.txt" not in test_output_files: description = "Cannot clone repository" state = "failure" diff --git a/tests/ci/push_to_artifactory.py b/tests/ci/push_to_artifactory.py deleted file mode 100755 index 97971f207ce..00000000000 --- a/tests/ci/push_to_artifactory.py +++ /dev/null @@ -1,367 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import argparse -import logging -import os -import re -from collections import namedtuple -from typing import Dict, List, Optional, Tuple - -from artifactory import ArtifactorySaaSPath # type: ignore -from build_download_helper import download_build_with_progress -from env_helper import S3_ARTIFACT_DOWNLOAD_TEMPLATE, RUNNER_TEMP -from git_helper import TAG_REGEXP, commit, removeprefix, removesuffix - - -# Necessary ENV variables -def getenv(name: str, default: Optional[str] = None) -> str: - env = os.getenv(name, default) - if env is not None: - return env - raise KeyError(f"Necessary {name} environment is not set") - - -TEMP_PATH = os.path.join(RUNNER_TEMP, "push_to_artifactory") -# One of the following ENVs is necessary -JFROG_API_KEY = getenv("JFROG_API_KEY", "") -JFROG_TOKEN = getenv("JFROG_TOKEN", "") - -CheckDesc = namedtuple("CheckDesc", ("check_name", "deb_arch", "rpm_arch")) - - -class Packages: - checks = ( - CheckDesc("package_release", "amd64", "x86_64"), - CheckDesc("package_aarch64", "arm64", "aarch64"), - ) - packages = ( - "clickhouse-client", - "clickhouse-common-static", - "clickhouse-common-static-dbg", - "clickhouse-server", - ) - - def __init__(self, version: str): - # Dicts of name: s3_path_suffix - self.deb = {} # type: Dict[str, str] - self.rpm = {} # type: Dict[str, str] - self.tgz = {} # type: Dict[str, str] - for check in self.checks: - for name in self.packages: - deb = f"{name}_{version}_{check.deb_arch}.deb" - self.deb[deb] = f"{check.check_name}/{deb}" - - rpm = f"{name}-{version}.{check.rpm_arch}.rpm" - self.rpm[rpm] = f"{check.check_name}/{rpm}" - - tgz = f"{name}-{version}-{check.deb_arch}.tgz" - self.tgz[tgz] = f"{check.check_name}/{tgz}" - - def arch(self, deb_pkg: str) -> str: - if deb_pkg not in self.deb: - raise ValueError(f"{deb_pkg} not in {self.deb}") - return removesuffix(deb_pkg, ".deb").split("_")[-1] - - def replace_with_fallback(self, name: str) -> None: - if name.endswith(".deb"): - suffix = self.deb.pop(name) - self.deb[self.fallback_to_all(name)] = self.fallback_to_all(suffix) - elif name.endswith(".rpm"): - suffix = self.rpm.pop(name) - self.rpm[self.fallback_to_all(name)] = self.fallback_to_all(suffix) - elif name.endswith(".tgz"): - suffix = self.tgz.pop(name) - self.tgz[self.fallback_to_all(name)] = self.fallback_to_all(suffix) - else: - raise KeyError(f"unknown package type for {name}") - - @staticmethod - def path(package_file: str) -> str: - return os.path.join(TEMP_PATH, package_file) - - @staticmethod - def fallback_to_all(url_or_name: str) -> str: - """Until July 2022 we had clickhouse-server and clickhouse-client with - arch 'all'""" - # deb - if url_or_name.endswith("amd64.deb") or url_or_name.endswith("arm64.deb"): - return f"{url_or_name[:-9]}all.deb" - # rpm - if url_or_name.endswith("x86_64.rpm") or url_or_name.endswith("aarch64.rpm"): - new = removesuffix(removesuffix(url_or_name, "x86_64.rpm"), "aarch64.rpm") - return f"{new}noarch.rpm" - # tgz - if url_or_name.endswith("-amd64.tgz") or url_or_name.endswith("-arm64.tgz"): - return f"{url_or_name[:-10]}.tgz" - return url_or_name - - -class S3: - def __init__( - self, - pr: int, - commit: str, - version: str, - force_download: bool, - ): - self._common = dict( - pr_or_release=pr, - commit=commit, - ) - self.force_download = force_download - self.packages = Packages(version) - - def download_package(self, package_file: str, s3_path_suffix: str) -> None: - path = Packages.path(package_file) - fallback_path = Packages.fallback_to_all(path) - if not self.force_download and ( - os.path.exists(path) or os.path.exists(fallback_path) - ): - if os.path.exists(fallback_path): - self.packages.replace_with_fallback(package_file) - - return - build_name, artifact = s3_path_suffix.split("/") - url = S3_ARTIFACT_DOWNLOAD_TEMPLATE.format_map( - {**self._common, "build_name": build_name, "artifact": artifact} - ) - try: - download_build_with_progress(url, path) - except Exception as e: - if "Cannot download dataset from" in e.args[0]: - new_url = Packages.fallback_to_all(url) - logging.warning( - "Fallback downloading %s for old release", fallback_path - ) - download_build_with_progress(new_url, fallback_path) - self.packages.replace_with_fallback(package_file) - - def download_deb(self): - # Copy to have a way to pop/add fallback packages - packages = self.packages.deb.copy() - for package_file, s3_path_suffix in packages.items(): - self.download_package(package_file, s3_path_suffix) - - def download_rpm(self): - # Copy to have a way to pop/add fallback packages - packages = self.packages.rpm.copy() - for package_file, s3_path_suffix in packages.items(): - self.download_package(package_file, s3_path_suffix) - - def download_tgz(self): - # Copy to have a way to pop/add fallback packages - packages = self.packages.tgz.copy() - for package_file, s3_path_suffix in packages.items(): - self.download_package(package_file, s3_path_suffix) - - -class Release: - def __init__(self, name: str): - r = re.compile(TAG_REGEXP) - # Automatically remove refs/tags/ if full refname passed here - name = removeprefix(name, "refs/tags/") - if not r.match(name): - raise argparse.ArgumentTypeError( - f"release name {name} does not match " - "v12.1.2.15-(testing|prestable|stable|lts) pattern" - ) - self._name = name - self._version = removeprefix(self._name, "v") - self._version = self.version.split("-")[0] - self._version_parts = tuple(self.version.split(".")) - self._type = self._name.split("-")[-1] - - @property - def version(self) -> str: - return self._version - - @property - def version_parts(self) -> Tuple[str, ...]: - return self._version_parts - - @property - def type(self) -> str: - return self._type - - -class Artifactory: - def __init__( - self, - url: str, - release: str, - deb_repo: str = "deb", - rpm_repo: str = "rpm", - tgz_repo: str = "tgz", - ): - self._url = url - self._release = release - self._deb_url = "/".join((self._url, deb_repo, "pool", self._release)) + "/" - self._rpm_url = "/".join((self._url, rpm_repo, self._release)) + "/" - self._tgz_url = "/".join((self._url, tgz_repo, self._release)) + "/" - # check the credentials ENVs for early exit - self.__path_helper("_deb", "") - - def deploy_deb(self, packages: Packages) -> None: - for package_file in packages.deb: - path = packages.path(package_file) - dist = self._release - comp = "main" - arch = packages.arch(package_file) - logging.info( - "Deploy %s(distribution=%s;component=%s;architecture=%s) " - "to artifactory", - path, - dist, - comp, - arch, - ) - self.deb_path(package_file).deploy_deb(path, dist, comp, arch) - - def deploy_rpm(self, packages: Packages) -> None: - for package_file in packages.rpm: - path = packages.path(package_file) - logging.info("Deploy %s to artifactory", path) - self.rpm_path(package_file).deploy_file(path) - - def deploy_tgz(self, packages: Packages) -> None: - for package_file in packages.tgz: - path = packages.path(package_file) - logging.info("Deploy %s to artifactory", path) - self.tgz_path(package_file).deploy_file(path) - - def __path_helper(self, name: str, package_file: str) -> ArtifactorySaaSPath: - url = "/".join((getattr(self, name + "_url"), package_file)) - path = None - if JFROG_API_KEY: - path = ArtifactorySaaSPath(url, apikey=JFROG_API_KEY) - elif JFROG_TOKEN: - path = ArtifactorySaaSPath(url, token=JFROG_TOKEN) - else: - raise KeyError("Neither JFROG_API_KEY nor JFROG_TOKEN env are defined") - return path - - def deb_path(self, package_file: str) -> ArtifactorySaaSPath: - return self.__path_helper("_deb", package_file) - - def rpm_path(self, package_file: str) -> ArtifactorySaaSPath: - return self.__path_helper("_rpm", package_file) - - def tgz_path(self, package_file: str) -> ArtifactorySaaSPath: - return self.__path_helper("_tgz", package_file) - - -def parse_args() -> argparse.Namespace: - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - description="Program to download artifacts from S3 and push them to " - "artifactory. ENV variables JFROG_API_KEY and JFROG_TOKEN are used " - "for authentication in the given order", - ) - parser.add_argument( - "--release", - required=True, - type=Release, - help="release name, e.g. v12.13.14.15-prestable; 'refs/tags/' " - "prefix is striped automatically", - ) - parser.add_argument( - "--pull-request", - type=int, - default=0, - help="pull request number; if PR is omitted, the first two numbers " - "from release will be used, e.g. 12.11", - ) - parser.add_argument( - "--commit", required=True, type=commit, help="commit hash for S3 bucket" - ) - parser.add_argument( - "--all", action="store_true", help="implies all deb, rpm and tgz" - ) - parser.add_argument( - "--deb", action="store_true", help="if Debian packages should be processed" - ) - parser.add_argument( - "--rpm", action="store_true", help="if RPM packages should be processed" - ) - parser.add_argument( - "--tgz", - action="store_true", - help="if tgz archives should be processed. They aren't pushed to artifactory", - ) - parser.add_argument( - "--artifactory-url", - default="https://clickhousedb.jfrog.io/artifactory", - help="SaaS Artifactory url", - ) - parser.add_argument("--artifactory", default=True, help=argparse.SUPPRESS) - parser.add_argument( - "-n", - "--no-artifactory", - action="store_false", - dest="artifactory", - default=argparse.SUPPRESS, - help="do not push packages to artifactory", - ) - parser.add_argument("--force-download", default=True, help=argparse.SUPPRESS) - parser.add_argument( - "--no-force-download", - action="store_false", - dest="force_download", - default=argparse.SUPPRESS, - help="do not download packages again if they exist already", - ) - - args = parser.parse_args() - if args.all: - args.deb = args.rpm = args.tgz = True - if not (args.deb or args.rpm or args.tgz): - parser.error("at least one of --deb, --rpm or --tgz should be specified") - if args.pull_request == 0: - args.pull_request = ".".join(args.release.version_parts[:2]) - return args - - -def process_deb(s3: S3, art_clients: List[Artifactory]) -> None: - s3.download_deb() - for art_client in art_clients: - art_client.deploy_deb(s3.packages) - - -def process_rpm(s3: S3, art_clients: List[Artifactory]) -> None: - s3.download_rpm() - for art_client in art_clients: - art_client.deploy_rpm(s3.packages) - - -def process_tgz(s3: S3, art_clients: List[Artifactory]) -> None: - s3.download_tgz() - for art_client in art_clients: - art_client.deploy_tgz(s3.packages) - - -def main(): - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - args = parse_args() - os.makedirs(TEMP_PATH, exist_ok=True) - s3 = S3( - args.pull_request, - args.commit, - args.release.version, - args.force_download, - ) - art_clients = [] - if args.artifactory: - art_clients.append(Artifactory(args.artifactory_url, args.release.type)) - if args.release.type == "lts": - art_clients.append(Artifactory(args.artifactory_url, "stable")) - - if args.deb: - process_deb(s3, art_clients) - if args.rpm: - process_rpm(s3, art_clients) - if args.tgz: - process_tgz(s3, art_clients) - - -if __name__ == "__main__": - main() diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index f80678fe8ba..4869301785e 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -29,11 +29,13 @@ class TeePopen: self.env = env or os.environ.copy() self._process = None # type: Optional[Popen] self.timeout = timeout + self.timeout_exceeded = False def _check_timeout(self) -> None: if self.timeout is None: return sleep(self.timeout) + self.timeout_exceeded = True while self.process.poll() is None: logging.warning( "Killing process %s, timeout %s exceeded", @@ -62,6 +64,16 @@ class TeePopen: def __exit__(self, exc_type, exc_value, traceback): self.wait() + if self.timeout_exceeded: + exceeded_log = ( + f"Command `{self.command}` has failed, " + f"timeout {self.timeout}s is exceeded" + ) + if self.process.stdout is not None: + sys.stdout.write(exceeded_log) + + self.log_file.write(exceeded_log) + self.log_file.close() def wait(self) -> int: diff --git a/tests/clickhouse-test b/tests/clickhouse-test index b49d1614d8a..392cdf63daa 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -232,19 +232,52 @@ def need_retry(args, stdout, stderr, total_time): ) -def get_processlist(args): +def get_processlist_with_stacktraces(args): try: if args.replicated_database: return clickhouse_execute_json( args, """ - SELECT materialize((hostName(), tcpPort())) as host, * - FROM clusterAllReplicas('test_cluster_database_replicated', system.processes) - WHERE query NOT LIKE '%system.processes%' + SELECT materialize(hostName() || '::' || tcpPort()::String) as host_port, * + -- NOTE: view() here to do JOIN on shards, instead of initiator + FROM clusterAllReplicas('test_cluster_database_replicated', view( + SELECT + groupArray((s.thread_id, arrayStringConcat(arrayMap( + x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), + s.trace), '\n') AS stacktrace + )) AS stacktraces, + p.* + FROM system.processes p + JOIN system.stack_trace s USING (query_id) + WHERE query NOT LIKE '%system.processes%' + GROUP BY p.* + )) + ORDER BY elapsed DESC """, + settings={ + "allow_introspection_functions": 1, + }, ) else: - return clickhouse_execute_json(args, "SHOW PROCESSLIST") + return clickhouse_execute_json( + args, + """ + SELECT + groupArray((s.thread_id, arrayStringConcat(arrayMap( + x -> concat(addressToLine(x), '::', demangle(addressToSymbol(x))), + s.trace), '\n') AS stacktrace + )) AS stacktraces, + p.* + FROM system.processes p + JOIN system.stack_trace s USING (query_id) + WHERE query NOT LIKE '%system.processes%' + GROUP BY p.* + ORDER BY elapsed DESC + """, + settings={ + "allow_introspection_functions": 1, + }, + ) except Exception as e: return "Failed to get processlist: " + str(e) @@ -1936,7 +1969,7 @@ def reportLogStats(args): LIMIT 100 FORMAT TSVWithNamesAndTypes """ - value = clickhouse_execute(args, query).decode() + value = clickhouse_execute(args, query).decode(errors="replace") print("\nTop patterns of log messages:\n") print(value) print("\n") @@ -1948,7 +1981,7 @@ def reportLogStats(args): count() AS count, substr(replaceRegexpAll(message, '[^A-Za-z]+', ''), 1, 32) AS pattern, substr(any(message), 1, 256) as runtime_message, - any((extract(source_file, '\/[a-zA-Z0-9_]+\.[a-z]+'), source_line)) as line + any((extract(source_file, '\/[a-zA-Z0-9_]+\.[a-z]+'), source_line)) as line FROM system.text_log WHERE (now() - toIntervalMinute(mins)) < event_time AND message_format_string = '' GROUP BY pattern @@ -1956,7 +1989,7 @@ def reportLogStats(args): LIMIT 50 FORMAT TSVWithNamesAndTypes """ - value = clickhouse_execute(args, query).decode() + value = clickhouse_execute(args, query).decode(errors="replace") print("\nTop messages without format string (fmt::runtime):\n") print(value) print("\n") @@ -2058,10 +2091,9 @@ def main(args): exit_code.value = 1 if args.hung_check: - # Some queries may execute in background for some time after test was finished. This is normal. for _ in range(1, 60): - processlist = get_processlist(args) + processlist = get_processlist_with_stacktraces(args) if not processlist: break sleep(1) @@ -2075,7 +2107,6 @@ def main(args): print(json.dumps(processlist, indent=4)) print(get_transactions_list(args)) - print_stacktraces() exit_code.value = 1 else: print(colored("\nNo queries hung.", args, "green", attrs=["bold"])) diff --git a/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml b/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml index 144be77c9f9..9e67f54f8e8 100644 --- a/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml +++ b/tests/integration/test_backup_restore_on_cluster/configs/disallow_concurrency.xml @@ -9,7 +9,7 @@ backups + false + false - false - false diff --git a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py index 8f514b95d0b..43e7682ec1d 100644 --- a/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py +++ b/tests/integration/test_backup_restore_on_cluster/test_disallow_concurrency.py @@ -148,10 +148,14 @@ def test_concurrent_backups_on_different_nodes(): backup_name = new_backup_name() - nodes[1].query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") + id = ( + nodes[1] + .query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name} ASYNC") + .split("\t")[0] + ) assert_eq_with_retry( nodes[1], - f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP'", + f"SELECT status FROM system.backups WHERE status == 'CREATING_BACKUP' AND id = '{id}'", "CREATING_BACKUP", ) assert "Concurrent backups not supported" in nodes[2].query_and_get_error( diff --git a/tests/integration/test_backup_restore_s3/test.py b/tests/integration/test_backup_restore_s3/test.py index b76ff2a2479..d5a7579df51 100644 --- a/tests/integration/test_backup_restore_s3/test.py +++ b/tests/integration/test_backup_restore_s3/test.py @@ -127,7 +127,7 @@ def test_backup_to_s3_multipart(): backup_destination = f"S3('http://minio1:9001/root/data/backups/multipart/{backup_name}', 'minio', 'minio123')" check_backup_and_restore(storage_policy, backup_destination, size=1000000) assert node.contains_in_log( - f"copyDataToS3: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" + f"copyDataToS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}" ) @@ -140,7 +140,7 @@ def test_backup_to_s3_native_copy(): check_backup_and_restore(storage_policy, backup_destination) assert node.contains_in_log("using native copy") assert node.contains_in_log( - f"copyFileS3ToS3: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" + f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -153,7 +153,7 @@ def test_backup_to_s3_native_copy_other_bucket(): check_backup_and_restore(storage_policy, backup_destination) assert node.contains_in_log("using native copy") assert node.contains_in_log( - f"copyFileS3ToS3: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" + f"copyS3File: Single operation copy has completed. Bucket: root, Key: data/backups/{backup_name}" ) @@ -164,5 +164,5 @@ def test_backup_to_s3_native_copy_multipart(): check_backup_and_restore(storage_policy, backup_destination, size=1000000) assert node.contains_in_log("using native copy") assert node.contains_in_log( - f"copyFileS3ToS3: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" + f"copyS3File: Multipart upload has completed. Bucket: root, Key: data/backups/multipart/{backup_name}/" ) diff --git a/tests/integration/test_replicated_merge_tree_s3_restore/test.py b/tests/integration/test_replicated_merge_tree_s3_restore/test.py index f26b3e7bd35..2181f260f32 100644 --- a/tests/integration/test_replicated_merge_tree_s3_restore/test.py +++ b/tests/integration/test_replicated_merge_tree_s3_restore/test.py @@ -131,24 +131,29 @@ def create_restore_file(node, revision=None, bucket=None, path=None, detached=No ["bash", "-c", "touch /var/lib/clickhouse/disks/s3/restore"], user="root" ) - add_restore_option = 'echo -en "{}={}\n" >> /var/lib/clickhouse/disks/s3/restore' - if revision: + num_restore_options = 0 + + def add_restore_option(key, value): + nonlocal num_restore_options + to = ">>" if num_restore_options else ">" node.exec_in_container( - ["bash", "-c", add_restore_option.format("revision", revision)], user="root" - ) - if bucket: - node.exec_in_container( - ["bash", "-c", add_restore_option.format("source_bucket", bucket)], + [ + "bash", + "-c", + f'echo -en "{key}={value}\n" {to} /var/lib/clickhouse/disks/s3/restore', + ], user="root", ) + num_restore_options += 1 + + if revision: + add_restore_option("revision", revision) + if bucket: + add_restore_option("source_bucket", bucket) if path: - node.exec_in_container( - ["bash", "-c", add_restore_option.format("source_path", path)], user="root" - ) + add_restore_option("source_path", path) if detached: - node.exec_in_container( - ["bash", "-c", add_restore_option.format("detached", "true")], user="root" - ) + add_restore_option("detached", "true") def get_revision_counter(node, backup_number): diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 21d41ec2a38..ac6eee11892 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -1743,7 +1743,7 @@ def test_s3_list_objects_failure(started_cluster): get_query = """ SELECT sleep({seconds}) FROM s3('http://resolver:8083/{bucket}/test_no_list_*', 'CSV', 'c1 UInt32') - SETTINGS s3_list_object_keys_size = 1, max_threads = {max_threads}, enable_s3_requests_logging = 1, input_format_parallel_parsing = 0 + SETTINGS s3_list_object_keys_size = 1, max_threads = {max_threads}, enable_s3_requests_logging = 1 """.format( bucket=bucket, seconds=random.random(), max_threads=random.randint(2, 20) ) diff --git a/tests/integration/test_system_merges/test.py b/tests/integration/test_system_merges/test.py index 643d5ffc8c7..0a469bd7bbd 100644 --- a/tests/integration/test_system_merges/test.py +++ b/tests/integration/test_system_merges/test.py @@ -96,7 +96,7 @@ def test_merge_simple(started_cluster, replicated): # Wait for OPTIMIZE to actually start assert_eq_with_retry( - node1, + node_check, f"select count() from system.merges where table='{table_name}'", "1\n", retry_count=30, @@ -196,7 +196,7 @@ def test_mutation_simple(started_cluster, replicated): # Wait for the mutation to actually start assert_eq_with_retry( - node1, + node_check, f"select count() from system.merges where table='{table_name}'", "1\n", retry_count=30, diff --git a/tests/queries/0_stateless/00039_inserts_through_http.sh b/tests/queries/0_stateless/00039_inserts_through_http.sh index 2eaa4393935..7b2ec25923c 100755 --- a/tests/queries/0_stateless/00039_inserts_through_http.sh +++ b/tests/queries/0_stateless/00039_inserts_through_http.sh @@ -5,11 +5,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CURDIR"/../shell_config.sh echo 'DROP TABLE IF EXISTS long_insert' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- -echo 'CREATE TABLE long_insert (a String) ENGINE = Memory' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- +echo 'CREATE TABLE long_insert (str String) ENGINE = Memory' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- for string_size in 1 10 100 1000 10000 100000 1000000; do # LC_ALL=C is needed because otherwise Perl will bark on bad tuned environment. LC_ALL=C perl -we 'for my $letter ("a" .. "z") { print(($letter x '$string_size') . "\n") }' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}&query=INSERT+INTO+long_insert+FORMAT+TabSeparated" --data-binary @- - echo 'SELECT substring(a, 1, 1) AS c, length(a) AS l FROM long_insert ORDER BY c, l' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- + echo 'SELECT substring(str, 1, 1) AS c, length(str) AS l FROM long_insert ORDER BY c, l' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- done echo 'DROP TABLE long_insert' | ${CLICKHOUSE_CURL} -sSg "${CLICKHOUSE_URL}" -d @- diff --git a/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh b/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh index 0eeabde917c..530e7e92e08 100755 --- a/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh +++ b/tests/queries/0_stateless/00898_parsing_bad_diagnostic_message.sh @@ -6,4 +6,5 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CUR_DIR"/../shell_config.sh -echo -ne '0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\ta' | $CLICKHOUSE_LOCAL --structure 'c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8, c10 UInt8, c11 UInt8' --input-format TSV --query 'SELECT * FROM table' 2>&1 | grep -F 'Column 11' +echo -ne '0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10\ta' | $CLICKHOUSE_LOCAL --structure 'c0 UInt8, c1 UInt8, c2 UInt8, c3 UInt8, c4 UInt8, c5 UInt8, c6 UInt8, c7 UInt8, c8 UInt8, c9 UInt8, c10 UInt8, c11 UInt8' --input-format TSV --query 'SELECT * FROM table' --input_format_tsv_detect_header=0 2>&1 | grep -F 'Column 11' + diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference index 10e8f0d2c59..4b3beccf5f1 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.reference @@ -1,3 +1,8 @@ +0 +0 2007 2007 2007 +0 +2007 +2007 diff --git a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql index ad50420b6ae..d3b36cda0d8 100644 --- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql +++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql @@ -1,4 +1,3 @@ - DROP TABLE IF EXISTS bloom_filter; CREATE TABLE bloom_filter @@ -13,9 +12,19 @@ insert into bloom_filter select number+2000, 'abc,def,zzz' from numbers(8); insert into bloom_filter select number+3000, 'yyy,uuu' from numbers(1024); insert into bloom_filter select number+3000, 'abcdefzzz' from numbers(1024); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS } +SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS } + +SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz'); +SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz'); + +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'ABC'); +select max(id) from bloom_filter where hasTokenCaseInsensitive(s, 'zZz'); + set max_rows_to_read = 16; SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc'); +SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'ABC'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'def'); SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'zzz'); diff --git a/tests/queries/0_stateless/01131_max_rows_to_sort.sql b/tests/queries/0_stateless/01131_max_rows_to_sort.sql index a6109700045..d18f35e091e 100644 --- a/tests/queries/0_stateless/01131_max_rows_to_sort.sql +++ b/tests/queries/0_stateless/01131_max_rows_to_sort.sql @@ -4,4 +4,5 @@ SELECT * FROM system.numbers ORDER BY number; -- { serverError 396 } SET sort_overflow_mode = 'break'; SET max_block_size = 1000; +set query_plan_remove_redundant_sorting=0; -- to keep sorting in the query below SELECT count() >= 100 AND count() <= 1000 FROM (SELECT * FROM system.numbers ORDER BY number); diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh index a6b3ebf4087..e75780a4520 100755 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh @@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CSV) +PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CSV --input_format_csv_detect_header 0) echo '2020-04-21 12:34:56, "Hello", 12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "CSV" echo '2020-04-21 12:34:56, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo '2020-04-21 12:34:567, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" @@ -14,7 +14,7 @@ echo '2020-04-21 12:34:56, "Hello", 12345678,1' | "${PARSER[@]}" 2>&1| grep "ERR echo '2020-04-21 12:34:56,,123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56, "Hello", 12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" -PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparatedIgnoreSpaces --format_custom_escaping_rule CSV --format_custom_field_delimiter ',' --format_custom_row_after_delimiter "") +PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparatedIgnoreSpaces --format_custom_escaping_rule CSV --format_custom_field_delimiter ',' --format_custom_row_after_delimiter "" --input_format_custom_detect_header 0) echo '2020-04-21 12:34:56, "Hello", 12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo -e "\nCustomSeparatedIgnoreSpaces" echo '2020-04-21 12:34:56, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo '2020-04-21 12:34:567, "Hello", 123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" @@ -22,7 +22,7 @@ echo '2020-04-21 12:34:56, "Hello", 12345678,1' | "${PARSER[@]}" 2>&1| grep "ERR echo '2020-04-21 12:34:56,,123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56, "Hello", 12345678\n\n\n\n ' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "OK" -PARSER=(${CLICKHOUSE_LOCAL} --input_format_null_as_default 0 --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format TSV) +PARSER=(${CLICKHOUSE_LOCAL} --input_format_null_as_default 0 --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format TSV --input_format_tsv_detect_header 0) echo -e '2020-04-21 12:34:56\tHello\t12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo -e "\nTSV" echo -e '2020-04-21 12:34:56\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" @@ -31,7 +31,7 @@ echo -e '2020-04-21 12:34:56\t\t123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\tHello\t12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '\N\tHello\t12345678' | "${PARSER[@]}" 2>&1| grep -o "Unexpected NULL value" -PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparated) +PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format CustomSeparated --input_format_custom_detect_header 0) echo -e '2020-04-21 12:34:56\tHello\t12345678' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo -e "\nCustomSeparated" echo -e '2020-04-21 12:34:56\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" diff --git a/tests/queries/0_stateless/01271_show_privileges.reference b/tests/queries/0_stateless/01271_show_privileges.reference index f088cfaf00c..c8edf196b1a 100644 --- a/tests/queries/0_stateless/01271_show_privileges.reference +++ b/tests/queries/0_stateless/01271_show_privileges.reference @@ -96,6 +96,7 @@ SYSTEM DROP DNS CACHE ['SYSTEM DROP DNS','DROP DNS CACHE','DROP DNS'] GLOBAL SYS SYSTEM DROP MARK CACHE ['SYSTEM DROP MARK','DROP MARK CACHE','DROP MARKS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP UNCOMPRESSED CACHE ['SYSTEM DROP UNCOMPRESSED','DROP UNCOMPRESSED CACHE','DROP UNCOMPRESSED'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP MMAP CACHE ['SYSTEM DROP MMAP','DROP MMAP CACHE','DROP MMAP'] GLOBAL SYSTEM DROP CACHE +SYSTEM DROP QUERY RESULT CACHE ['SYSTEM DROP QUERY RESULT','DROP QUERY RESULT CACHE','DROP QUERY RESULT'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP COMPILED EXPRESSION CACHE ['SYSTEM DROP COMPILED EXPRESSION','DROP COMPILED EXPRESSION CACHE','DROP COMPILED EXPRESSIONS'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP FILESYSTEM CACHE ['SYSTEM DROP FILESYSTEM CACHE','DROP FILESYSTEM CACHE'] GLOBAL SYSTEM DROP CACHE SYSTEM DROP SCHEMA CACHE ['SYSTEM DROP SCHEMA CACHE','DROP SCHEMA CACHE'] GLOBAL SYSTEM DROP CACHE diff --git a/tests/queries/0_stateless/01417_freeze_partition_verbose.sh b/tests/queries/0_stateless/01417_freeze_partition_verbose.sh index 1af700c1f6e..9f6ae260750 100755 --- a/tests/queries/0_stateless/01417_freeze_partition_verbose.sh +++ b/tests/queries/0_stateless/01417_freeze_partition_verbose.sh @@ -22,40 +22,40 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze_old_syntax SELECT toD ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE WITH NAME 'test_01417' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '3' WITH NAME 'test_01417_single_part' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze DETACH PARTITION '3';" ${CLICKHOUSE_CLIENT} --query "INSERT INTO table_for_freeze VALUES (3, '3');" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze ATTACH PARTITION '3' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, old_part_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze DETACH PARTITION '5';" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7', ATTACH PART '5_6_6_0' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table FORMAT TSVWithNames" # Unfreeze partition ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze UNFREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" # Freeze partition with old syntax ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax FREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" # Unfreeze partition with old syntax ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_old_syntax UNFREEZE PARTITION '202103' WITH NAME 'test_01417_single_part_old_syntax' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" # Unfreeze the whole backup with SYSTEM query ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze FREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7_system'" diff --git a/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh b/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh index 4629450c1f9..1fd8a2b29c6 100755 --- a/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh +++ b/tests/queries/0_stateless/01417_freeze_partition_verbose_zookeeper.sh @@ -18,24 +18,24 @@ ${CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --query "INSE ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated FREEZE WITH NAME 'test_01417' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated FREEZE PARTITION '3' WITH NAME 'test_01417_single_part' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated DETACH PARTITION '3';" ${CLICKHOUSE_CLIENT} --insert_keeper_fault_injection_probability=0 --query "INSERT INTO table_for_freeze_replicated VALUES (3, '3');" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated ATTACH PARTITION '3' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, old_part_name FROM table FORMAT TSVWithNames" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated DETACH PARTITION '5';" ${CLICKHOUSE_CLIENT} --query "ALTER TABLE table_for_freeze_replicated FREEZE PARTITION '7' WITH NAME 'test_01417_single_part_7', ATTACH PART '5_0_0_0' FORMAT TSVWithNames SETTINGS alter_partition_verbose_result = 1;" \ | ${CLICKHOUSE_LOCAL} --structure "$ALTER_OUT_STRUCTURE, $FREEZE_OUT_STRUCTURE, $ATTACH_OUT_STRUCTURE" \ - --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table" + --query "SELECT command_type, partition_id, part_name, backup_name, old_part_name FROM table FORMAT TSVWithNames" # teardown ${CLICKHOUSE_CLIENT} --query "DROP TABLE IF EXISTS table_for_freeze_replicated SYNC;" diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.reference b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.reference deleted file mode 100644 index 70c19fc8ced..00000000000 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.reference +++ /dev/null @@ -1,12 +0,0 @@ -210 230 20 -SELECT - sum(a), - sumCount(b).1, - sumCount(b).2 -FROM fuse_tbl ----------NOT trigger fuse-------- -210 11.5 -SELECT - sum(a), - avg(b) -FROM fuse_tbl diff --git a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql b/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql deleted file mode 100644 index 375662eb405..00000000000 --- a/tests/queries/0_stateless/01744_fuse_sum_count_aggregate.sql +++ /dev/null @@ -1,14 +0,0 @@ -DROP TABLE IF EXISTS fuse_tbl; -CREATE TABLE fuse_tbl(a Int8, b Int8) Engine = Log; -INSERT INTO fuse_tbl SELECT number, number + 1 FROM numbers(1, 20); - -SET optimize_syntax_fuse_functions = 1; -SET optimize_fuse_sum_count_avg = 1; - -SELECT sum(a), sum(b), count(b) from fuse_tbl; -EXPLAIN SYNTAX SELECT sum(a), sum(b), count(b) from fuse_tbl; -SELECT '---------NOT trigger fuse--------'; -SELECT sum(a), avg(b) from fuse_tbl; -EXPLAIN SYNTAX SELECT sum(a), avg(b) from fuse_tbl; - -DROP TABLE fuse_tbl; diff --git a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 index 0d6bef7fadb..d2cc066a1b1 100644 --- a/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 +++ b/tests/queries/0_stateless/01881_join_on_conditions_hash.sql.j2 @@ -30,6 +30,8 @@ SELECT t1.key, t1.key2 FROM t1 INNER ALL JOIN t2 ON t1.id == t2.id AND t2.key == SELECT '--'; SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2; +SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 0; -- { serverError INVALID_JOIN_ON_EXPRESSION } +SELECT t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND 1; -- { serverError INVALID_JOIN_ON_EXPRESSION } SELECT '--'; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.id > 2; diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh index b28c56f9266..967c6538bb1 100755 --- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -97,37 +97,37 @@ echo 'Corner cases' echo 'TSV' echo -e "Some text\tCustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" echo -e "Some text\tCustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" echo -e "Some text\t123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo -e "Some text\tNU\tLL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0, input_format_tsv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'CSV' echo -e "Some text,CustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0" echo -e "Some text,CustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0, input_format_csv_detect_header=0" -echo -e "Some text,123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +echo -e "Some text,123NNN\n" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' -echo -e "Some text,NU,LL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +echo -e "Some text,NU,LL\n" > $DATA_FILE +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0, input_format_csv_detect_header=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'Large custom NULL' $CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'" -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000', input_format_tsv_detect_header=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000', input_format_tsv_detect_header=0" rm $DATA_FILE diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 9e065c455e9..709baec8ba6 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -266,7 +266,9 @@ CREATE TABLE system.formats ( `name` String, `is_input` UInt8, - `is_output` UInt8 + `is_output` UInt8, + `supports_parallel_parsing` UInt8, + `supports_parallel_formatting` UInt8 ) ENGINE = SystemFormats COMMENT 'SYSTEM TABLE is built on the fly.' @@ -286,7 +288,7 @@ CREATE TABLE system.grants ( `user_name` Nullable(String), `role_name` Nullable(String), - `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154), + `access_type` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY RESULT CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP CACHE' = 102, 'SYSTEM RELOAD CONFIG' = 103, 'SYSTEM RELOAD USERS' = 104, 'SYSTEM RELOAD SYMBOLS' = 105, 'SYSTEM RELOAD DICTIONARY' = 106, 'SYSTEM RELOAD MODEL' = 107, 'SYSTEM RELOAD FUNCTION' = 108, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 109, 'SYSTEM RELOAD' = 110, 'SYSTEM RESTART DISK' = 111, 'SYSTEM MERGES' = 112, 'SYSTEM TTL MERGES' = 113, 'SYSTEM FETCHES' = 114, 'SYSTEM MOVES' = 115, 'SYSTEM DISTRIBUTED SENDS' = 116, 'SYSTEM REPLICATED SENDS' = 117, 'SYSTEM SENDS' = 118, 'SYSTEM REPLICATION QUEUES' = 119, 'SYSTEM DROP REPLICA' = 120, 'SYSTEM SYNC REPLICA' = 121, 'SYSTEM RESTART REPLICA' = 122, 'SYSTEM RESTORE REPLICA' = 123, 'SYSTEM WAIT LOADING PARTS' = 124, 'SYSTEM SYNC DATABASE REPLICA' = 125, 'SYSTEM SYNC TRANSACTION LOG' = 126, 'SYSTEM FLUSH DISTRIBUTED' = 127, 'SYSTEM FLUSH LOGS' = 128, 'SYSTEM FLUSH' = 129, 'SYSTEM THREAD FUZZER' = 130, 'SYSTEM UNFREEZE' = 131, 'SYSTEM' = 132, 'dictGet' = 133, 'addressToLine' = 134, 'addressToLineWithInlines' = 135, 'addressToSymbol' = 136, 'demangle' = 137, 'INTROSPECTION' = 138, 'FILE' = 139, 'URL' = 140, 'REMOTE' = 141, 'MONGO' = 142, 'MEILISEARCH' = 143, 'MYSQL' = 144, 'POSTGRES' = 145, 'SQLITE' = 146, 'ODBC' = 147, 'JDBC' = 148, 'HDFS' = 149, 'S3' = 150, 'HIVE' = 151, 'SOURCES' = 152, 'CLUSTER' = 153, 'ALL' = 154, 'NONE' = 155), `database` Nullable(String), `table` Nullable(String), `column` Nullable(String), @@ -567,10 +569,10 @@ ENGINE = SystemPartsColumns COMMENT 'SYSTEM TABLE is built on the fly.' CREATE TABLE system.privileges ( - `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154), + `privilege` Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY RESULT CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP CACHE' = 102, 'SYSTEM RELOAD CONFIG' = 103, 'SYSTEM RELOAD USERS' = 104, 'SYSTEM RELOAD SYMBOLS' = 105, 'SYSTEM RELOAD DICTIONARY' = 106, 'SYSTEM RELOAD MODEL' = 107, 'SYSTEM RELOAD FUNCTION' = 108, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 109, 'SYSTEM RELOAD' = 110, 'SYSTEM RESTART DISK' = 111, 'SYSTEM MERGES' = 112, 'SYSTEM TTL MERGES' = 113, 'SYSTEM FETCHES' = 114, 'SYSTEM MOVES' = 115, 'SYSTEM DISTRIBUTED SENDS' = 116, 'SYSTEM REPLICATED SENDS' = 117, 'SYSTEM SENDS' = 118, 'SYSTEM REPLICATION QUEUES' = 119, 'SYSTEM DROP REPLICA' = 120, 'SYSTEM SYNC REPLICA' = 121, 'SYSTEM RESTART REPLICA' = 122, 'SYSTEM RESTORE REPLICA' = 123, 'SYSTEM WAIT LOADING PARTS' = 124, 'SYSTEM SYNC DATABASE REPLICA' = 125, 'SYSTEM SYNC TRANSACTION LOG' = 126, 'SYSTEM FLUSH DISTRIBUTED' = 127, 'SYSTEM FLUSH LOGS' = 128, 'SYSTEM FLUSH' = 129, 'SYSTEM THREAD FUZZER' = 130, 'SYSTEM UNFREEZE' = 131, 'SYSTEM' = 132, 'dictGet' = 133, 'addressToLine' = 134, 'addressToLineWithInlines' = 135, 'addressToSymbol' = 136, 'demangle' = 137, 'INTROSPECTION' = 138, 'FILE' = 139, 'URL' = 140, 'REMOTE' = 141, 'MONGO' = 142, 'MEILISEARCH' = 143, 'MYSQL' = 144, 'POSTGRES' = 145, 'SQLITE' = 146, 'ODBC' = 147, 'JDBC' = 148, 'HDFS' = 149, 'S3' = 150, 'HIVE' = 151, 'SOURCES' = 152, 'CLUSTER' = 153, 'ALL' = 154, 'NONE' = 155), `aliases` Array(String), `level` Nullable(Enum8('GLOBAL' = 0, 'DATABASE' = 1, 'TABLE' = 2, 'DICTIONARY' = 3, 'VIEW' = 4, 'COLUMN' = 5)), - `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 98, 'SYSTEM DROP FILESYSTEM CACHE' = 99, 'SYSTEM DROP SCHEMA CACHE' = 100, 'SYSTEM DROP CACHE' = 101, 'SYSTEM RELOAD CONFIG' = 102, 'SYSTEM RELOAD USERS' = 103, 'SYSTEM RELOAD SYMBOLS' = 104, 'SYSTEM RELOAD DICTIONARY' = 105, 'SYSTEM RELOAD MODEL' = 106, 'SYSTEM RELOAD FUNCTION' = 107, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 108, 'SYSTEM RELOAD' = 109, 'SYSTEM RESTART DISK' = 110, 'SYSTEM MERGES' = 111, 'SYSTEM TTL MERGES' = 112, 'SYSTEM FETCHES' = 113, 'SYSTEM MOVES' = 114, 'SYSTEM DISTRIBUTED SENDS' = 115, 'SYSTEM REPLICATED SENDS' = 116, 'SYSTEM SENDS' = 117, 'SYSTEM REPLICATION QUEUES' = 118, 'SYSTEM DROP REPLICA' = 119, 'SYSTEM SYNC REPLICA' = 120, 'SYSTEM RESTART REPLICA' = 121, 'SYSTEM RESTORE REPLICA' = 122, 'SYSTEM WAIT LOADING PARTS' = 123, 'SYSTEM SYNC DATABASE REPLICA' = 124, 'SYSTEM SYNC TRANSACTION LOG' = 125, 'SYSTEM FLUSH DISTRIBUTED' = 126, 'SYSTEM FLUSH LOGS' = 127, 'SYSTEM FLUSH' = 128, 'SYSTEM THREAD FUZZER' = 129, 'SYSTEM UNFREEZE' = 130, 'SYSTEM' = 131, 'dictGet' = 132, 'addressToLine' = 133, 'addressToLineWithInlines' = 134, 'addressToSymbol' = 135, 'demangle' = 136, 'INTROSPECTION' = 137, 'FILE' = 138, 'URL' = 139, 'REMOTE' = 140, 'MONGO' = 141, 'MEILISEARCH' = 142, 'MYSQL' = 143, 'POSTGRES' = 144, 'SQLITE' = 145, 'ODBC' = 146, 'JDBC' = 147, 'HDFS' = 148, 'S3' = 149, 'HIVE' = 150, 'SOURCES' = 151, 'CLUSTER' = 152, 'ALL' = 153, 'NONE' = 154)) + `parent_group` Nullable(Enum16('SHOW DATABASES' = 0, 'SHOW TABLES' = 1, 'SHOW COLUMNS' = 2, 'SHOW DICTIONARIES' = 3, 'SHOW' = 4, 'SHOW FILESYSTEM CACHES' = 5, 'SELECT' = 6, 'INSERT' = 7, 'ALTER UPDATE' = 8, 'ALTER DELETE' = 9, 'ALTER ADD COLUMN' = 10, 'ALTER MODIFY COLUMN' = 11, 'ALTER DROP COLUMN' = 12, 'ALTER COMMENT COLUMN' = 13, 'ALTER CLEAR COLUMN' = 14, 'ALTER RENAME COLUMN' = 15, 'ALTER MATERIALIZE COLUMN' = 16, 'ALTER COLUMN' = 17, 'ALTER MODIFY COMMENT' = 18, 'ALTER ORDER BY' = 19, 'ALTER SAMPLE BY' = 20, 'ALTER ADD INDEX' = 21, 'ALTER DROP INDEX' = 22, 'ALTER MATERIALIZE INDEX' = 23, 'ALTER CLEAR INDEX' = 24, 'ALTER INDEX' = 25, 'ALTER ADD PROJECTION' = 26, 'ALTER DROP PROJECTION' = 27, 'ALTER MATERIALIZE PROJECTION' = 28, 'ALTER CLEAR PROJECTION' = 29, 'ALTER PROJECTION' = 30, 'ALTER ADD CONSTRAINT' = 31, 'ALTER DROP CONSTRAINT' = 32, 'ALTER CONSTRAINT' = 33, 'ALTER TTL' = 34, 'ALTER MATERIALIZE TTL' = 35, 'ALTER SETTINGS' = 36, 'ALTER MOVE PARTITION' = 37, 'ALTER FETCH PARTITION' = 38, 'ALTER FREEZE PARTITION' = 39, 'ALTER DATABASE SETTINGS' = 40, 'ALTER NAMED COLLECTION' = 41, 'ALTER TABLE' = 42, 'ALTER DATABASE' = 43, 'ALTER VIEW REFRESH' = 44, 'ALTER VIEW MODIFY QUERY' = 45, 'ALTER VIEW' = 46, 'ALTER' = 47, 'CREATE DATABASE' = 48, 'CREATE TABLE' = 49, 'CREATE VIEW' = 50, 'CREATE DICTIONARY' = 51, 'CREATE TEMPORARY TABLE' = 52, 'CREATE FUNCTION' = 53, 'CREATE NAMED COLLECTION' = 54, 'CREATE' = 55, 'DROP DATABASE' = 56, 'DROP TABLE' = 57, 'DROP VIEW' = 58, 'DROP DICTIONARY' = 59, 'DROP FUNCTION' = 60, 'DROP NAMED COLLECTION' = 61, 'DROP' = 62, 'TRUNCATE' = 63, 'OPTIMIZE' = 64, 'BACKUP' = 65, 'KILL QUERY' = 66, 'KILL TRANSACTION' = 67, 'MOVE PARTITION BETWEEN SHARDS' = 68, 'CREATE USER' = 69, 'ALTER USER' = 70, 'DROP USER' = 71, 'CREATE ROLE' = 72, 'ALTER ROLE' = 73, 'DROP ROLE' = 74, 'ROLE ADMIN' = 75, 'CREATE ROW POLICY' = 76, 'ALTER ROW POLICY' = 77, 'DROP ROW POLICY' = 78, 'CREATE QUOTA' = 79, 'ALTER QUOTA' = 80, 'DROP QUOTA' = 81, 'CREATE SETTINGS PROFILE' = 82, 'ALTER SETTINGS PROFILE' = 83, 'DROP SETTINGS PROFILE' = 84, 'SHOW USERS' = 85, 'SHOW ROLES' = 86, 'SHOW ROW POLICIES' = 87, 'SHOW QUOTAS' = 88, 'SHOW SETTINGS PROFILES' = 89, 'SHOW ACCESS' = 90, 'SHOW NAMED COLLECTIONS' = 91, 'ACCESS MANAGEMENT' = 92, 'SYSTEM SHUTDOWN' = 93, 'SYSTEM DROP DNS CACHE' = 94, 'SYSTEM DROP MARK CACHE' = 95, 'SYSTEM DROP UNCOMPRESSED CACHE' = 96, 'SYSTEM DROP MMAP CACHE' = 97, 'SYSTEM DROP QUERY RESULT CACHE' = 98, 'SYSTEM DROP COMPILED EXPRESSION CACHE' = 99, 'SYSTEM DROP FILESYSTEM CACHE' = 100, 'SYSTEM DROP SCHEMA CACHE' = 101, 'SYSTEM DROP CACHE' = 102, 'SYSTEM RELOAD CONFIG' = 103, 'SYSTEM RELOAD USERS' = 104, 'SYSTEM RELOAD SYMBOLS' = 105, 'SYSTEM RELOAD DICTIONARY' = 106, 'SYSTEM RELOAD MODEL' = 107, 'SYSTEM RELOAD FUNCTION' = 108, 'SYSTEM RELOAD EMBEDDED DICTIONARIES' = 109, 'SYSTEM RELOAD' = 110, 'SYSTEM RESTART DISK' = 111, 'SYSTEM MERGES' = 112, 'SYSTEM TTL MERGES' = 113, 'SYSTEM FETCHES' = 114, 'SYSTEM MOVES' = 115, 'SYSTEM DISTRIBUTED SENDS' = 116, 'SYSTEM REPLICATED SENDS' = 117, 'SYSTEM SENDS' = 118, 'SYSTEM REPLICATION QUEUES' = 119, 'SYSTEM DROP REPLICA' = 120, 'SYSTEM SYNC REPLICA' = 121, 'SYSTEM RESTART REPLICA' = 122, 'SYSTEM RESTORE REPLICA' = 123, 'SYSTEM WAIT LOADING PARTS' = 124, 'SYSTEM SYNC DATABASE REPLICA' = 125, 'SYSTEM SYNC TRANSACTION LOG' = 126, 'SYSTEM FLUSH DISTRIBUTED' = 127, 'SYSTEM FLUSH LOGS' = 128, 'SYSTEM FLUSH' = 129, 'SYSTEM THREAD FUZZER' = 130, 'SYSTEM UNFREEZE' = 131, 'SYSTEM' = 132, 'dictGet' = 133, 'addressToLine' = 134, 'addressToLineWithInlines' = 135, 'addressToSymbol' = 136, 'demangle' = 137, 'INTROSPECTION' = 138, 'FILE' = 139, 'URL' = 140, 'REMOTE' = 141, 'MONGO' = 142, 'MEILISEARCH' = 143, 'MYSQL' = 144, 'POSTGRES' = 145, 'SQLITE' = 146, 'ODBC' = 147, 'JDBC' = 148, 'HDFS' = 149, 'S3' = 150, 'HIVE' = 151, 'SOURCES' = 152, 'CLUSTER' = 153, 'ALL' = 154, 'NONE' = 155)) ) ENGINE = SystemPrivileges COMMENT 'SYSTEM TABLE is built on the fly.' diff --git a/tests/queries/0_stateless/02118_deserialize_whole_text.sh b/tests/queries/0_stateless/02118_deserialize_whole_text.sh index e9f35582f15..d4702887e7f 100755 --- a/tests/queries/0_stateless/02118_deserialize_whole_text.sh +++ b/tests/queries/0_stateless/02118_deserialize_whole_text.sh @@ -33,10 +33,10 @@ echo "[\"255.255.255.255trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv4')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "255.255.255.255trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv4')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"255.255.255.255trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv4')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' @@ -45,10 +45,10 @@ echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactStringsEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'TSV', 'x IPv6')" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo 'OK' || echo 'FAIL' echo "0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'CSV', 'x IPv6')" 2>&1 | grep -F -q "INCORRECT_DATA" && echo 'OK' || echo 'FAIL' echo "[\"0000:0000:0000:0000:0000:ffff:192.168.100.228b1trash\"]" > $DATA_FILE $CLICKHOUSE_CLIENT -q "SELECT * FROM file('data_02118', 'JSONCompactEachRow', 'x IPv6')" 2>&1 | grep -F -q "UNEXPECTED_DATA_AFTER_PARSED_VALUE" && echo 'OK' || echo 'FAIL' diff --git a/tests/queries/0_stateless/02119_sumcount.sql b/tests/queries/0_stateless/02119_sumcount.sql index 86625996f44..6e6409935d5 100644 --- a/tests/queries/0_stateless/02119_sumcount.sql +++ b/tests/queries/0_stateless/02119_sumcount.sql @@ -1,3 +1,11 @@ +set query_plan_remove_redundant_sorting=0; -- disable it for now since test with Float64 is failing with it +-- while debugging I observe incorrect behavior which can affect the current test result +-- but it's still unclear to the test is not failing w/o the optimization +-- SELECT CAST('9007199254740992', 'Float64') + CAST('1', 'Float64') +-- ┌─plus(CAST('9007199254740992', 'Float64'), CAST('1', 'Float64'))─┐ +-- │ 9007199254740992 │ +-- └─────────────────────────────────────────────────────────────────┘ + -- Integer types are added as integers SELECT toTypeName(sumCount(v)), sumCount(v) FROM ( diff --git a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference index 5fd48ae580a..be82d744a3b 100644 --- a/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference +++ b/tests/queries/0_stateless/02228_unquoted_dates_in_csv_schema_inference.reference @@ -1 +1 @@ -c1 Nullable(String) +c1 Nullable(DateTime64(9)) diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.reference b/tests/queries/0_stateless/02240_filesystem_query_cache.reference index 48d91c6f142..f4b9f7bb127 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.reference +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.reference @@ -3,7 +3,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; SET skip_download_if_exceeds_query_cache=1; -SET max_query_cache_size=128; +SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; INSERT INTO test SELECT number, toString(number) FROM numbers(100); diff --git a/tests/queries/0_stateless/02240_filesystem_query_cache.sql b/tests/queries/0_stateless/02240_filesystem_query_cache.sql index 7dd975b27ee..94eb4bc5ccd 100644 --- a/tests/queries/0_stateless/02240_filesystem_query_cache.sql +++ b/tests/queries/0_stateless/02240_filesystem_query_cache.sql @@ -5,7 +5,7 @@ SYSTEM DROP FILESYSTEM CACHE; SET enable_filesystem_cache_on_write_operations=0; SET skip_download_if_exceeds_query_cache=1; -SET max_query_cache_size=128; +SET filesystem_cache_max_download_size=128; DROP TABLE IF EXISTS test; CREATE TABLE test (key UInt32, value String) Engine=MergeTree() ORDER BY key SETTINGS storage_policy='s3_cache_4', min_bytes_for_wide_part = 10485760, compress_marks=false, compress_primary_key=false; diff --git a/tests/queries/0_stateless/02244_issue_35598_fuse.reference b/tests/queries/0_stateless/02244_issue_35598_fuse.reference deleted file mode 100644 index 6ce84b402a3..00000000000 --- a/tests/queries/0_stateless/02244_issue_35598_fuse.reference +++ /dev/null @@ -1,2 +0,0 @@ -0 0 nan -0 0 nan diff --git a/tests/queries/0_stateless/02244_issue_35598_fuse.sql b/tests/queries/0_stateless/02244_issue_35598_fuse.sql deleted file mode 100644 index a590854eb6c..00000000000 --- a/tests/queries/0_stateless/02244_issue_35598_fuse.sql +++ /dev/null @@ -1,5 +0,0 @@ -SELECT sum(x), count(x), avg(x) FROM (SELECT number :: Decimal32(0) AS x FROM numbers(0)) -SETTINGS optimize_syntax_fuse_functions = 0, optimize_fuse_sum_count_avg = 0; - -SELECT sum(x), count(x), avg(x) FROM (SELECT number :: Decimal32(0) AS x FROM numbers(0)) -SETTINGS optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference index ba924f5daa2..bafa70556e7 100644 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.reference @@ -1,5 +1,5 @@ -- EXPLAIN PLAN sorting for MergeTree w/o sorting key --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC @@ -21,49 +21,49 @@ MergeSortingTransform × 3 LimitsCheckingTransform × 3 PartialSortingTransform × 3 -- ExpressionStep preserves sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC Sorting (Chunk): a ASC Sorting (Stream): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting ORDER BY a+1 Sorting (None) Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC -- ExpressionStep breaks sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting ORDER BY a+1 Sorting (Global): plus(a, 1) ASC Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC Sorting (None) Sorting (Chunk): a ASC -- FilterStep preserves sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM optimize_sorting WHERE a+1 > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, a+1 FROM optimize_sorting WHERE a+1 > 0 Sorting (Chunk): a ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC -- FilterStep breaks sort mode --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a > 0 FROM optimize_sorting WHERE a > 0 Sorting (None) Sorting (None) Sorting (Chunk): a ASC --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a+1 FROM optimize_sorting WHERE a+1 > 0 Sorting (None) Sorting (None) Sorting (Chunk): a ASC -- aliases break sorting order --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a FROM (SELECT sipHash64(a) AS a FROM (SELECT a FROM optimize_sorting ORDER BY a)) ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC @@ -73,14 +73,14 @@ Sorting (Global): a ASC Sorting (Chunk): a ASC Sorting (Stream): a ASC -- aliases DONT break sorting order --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y) +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, b FROM (SELECT x AS a, y AS b FROM (SELECT a AS x, b AS y FROM optimize_sorting) ORDER BY x, y) Sorting (Global): x ASC, y ASC Sorting (Sorting for ORDER BY) Sorting (Global): x ASC, y ASC Sorting (Chunk): a ASC, b ASC Sorting (Stream): a ASC, b ASC -- actions chain breaks sorting order: input(column a)->sipHash64(column a)->alias(sipHash64(column a), a)->plus(alias a, 1) --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1 +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN actions=1, header=1, sorting=1 SELECT a, z FROM (SELECT sipHash64(a) AS a, a + 1 AS z FROM (SELECT a FROM optimize_sorting ORDER BY a + 1)) ORDER BY a + 1 Sorting (None) Sorting (Sorting for ORDER BY) Sorting (Global): plus(a, 1) ASC @@ -90,7 +90,7 @@ Sorting (Global): plus(a, 1) ASC Sorting (Chunk): a ASC Sorting (Chunk): a ASC -- check that correct sorting info is provided in case of only prefix of sorting key is in ORDER BY clause but all sorting key columns returned by query --- QUERY: set optimize_read_in_order=1;set max_threads=3;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a +-- QUERY: set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0;EXPLAIN PLAN sorting=1 SELECT a, b FROM optimize_sorting ORDER BY a Sorting (Global): a ASC Sorting (Sorting for ORDER BY) Sorting (Global): a ASC diff --git a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh index 62051701cb6..0678ff63e3f 100755 --- a/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh +++ b/tests/queries/0_stateless/02377_optimize_sorting_by_input_stream_properties_explain.sh @@ -6,7 +6,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) DISABLE_OPTIMIZATION="set optimize_sorting_by_input_stream_properties=0;set query_plan_read_in_order=0;set max_threads=3" ENABLE_OPTIMIZATION="set optimize_sorting_by_input_stream_properties=1;set query_plan_read_in_order=1;set optimize_read_in_order=1;set max_threads=3" -MAKE_OUTPUT_STABLE="set optimize_read_in_order=1;set max_threads=3" +MAKE_OUTPUT_STABLE="set optimize_read_in_order=1;set max_threads=3;set query_plan_remove_redundant_sorting=0" GREP_SORTING="grep 'PartialSortingTransform\|LimitsCheckingTransform\|MergeSortingTransform\|MergingSortedTransform'" GREP_SORTMODE="grep 'Sorting ('" TRIM_LEADING_SPACES="sed -e 's/^[ \t]*//'" diff --git a/tests/queries/0_stateless/02404_memory_bound_merging.sql b/tests/queries/0_stateless/02404_memory_bound_merging.sql index a0ac0342c34..cd044642070 100644 --- a/tests/queries/0_stateless/02404_memory_bound_merging.sql +++ b/tests/queries/0_stateless/02404_memory_bound_merging.sql @@ -1,5 +1,13 @@ -- Tags: no-parallel, no-random-merge-tree-settings +drop table if exists pr_t; +drop table if exists dist_pr_t; +drop table if exists dist_t_different_dbs; +drop table if exists shard_1.t_different_dbs; +drop table if exists t_different_dbs; +drop table if exists dist_t; +drop table if exists t; + create table t(a UInt64, b UInt64) engine=MergeTree order by a; system stop merges t; insert into t select number, number from numbers_mt(1e6); @@ -64,6 +72,7 @@ select a, count() from dist_pr_t group by a, b order by a limit 5 offset 500; -- { echoOff } -- +drop table pr_t; drop table dist_pr_t; drop table dist_t_different_dbs; drop table shard_1.t_different_dbs; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 9d747f9c572..d225cf5f332 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -344,8 +344,6 @@ hasAny hasColumnInTable hasSubstr hasThreadFuzzer -hasToken -hasTokenCaseInsensitive hashid hex hiveHash diff --git a/tests/queries/0_stateless/02476_fuse_sum_count.sql b/tests/queries/0_stateless/02476_fuse_sum_count.sql index ee65d32d0cf..315bbd10a65 100644 --- a/tests/queries/0_stateless/02476_fuse_sum_count.sql +++ b/tests/queries/0_stateless/02476_fuse_sum_count.sql @@ -1,5 +1,5 @@ SET allow_experimental_analyzer = 1; -SET optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; +SET optimize_syntax_fuse_functions = 1; DROP TABLE IF EXISTS fuse_tbl; diff --git a/tests/queries/0_stateless/02477_fuse_quantiles.sql b/tests/queries/0_stateless/02477_fuse_quantiles.sql index efd861ad7f3..c0719d771d7 100644 --- a/tests/queries/0_stateless/02477_fuse_quantiles.sql +++ b/tests/queries/0_stateless/02477_fuse_quantiles.sql @@ -1,5 +1,5 @@ SET allow_experimental_analyzer = 1; -SET optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; +SET optimize_syntax_fuse_functions = 1; DROP TABLE IF EXISTS fuse_tbl; diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference index fdab24700ac..d01bb5715ad 100644 --- a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference +++ b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.reference @@ -246,3 +246,40 @@ QUERY id: 0, group_by_type: grouping_sets ARGUMENTS LIST id: 53, nodes: 1 COLUMN id: 54, column_name: number, result_type: UInt64, source_id: 11 +QUERY id: 0, group_by_type: grouping_sets + PROJECTION COLUMNS + count() UInt64 + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: count, function_type: aggregate, result_type: UInt64 + JOIN TREE + TABLE_FUNCTION id: 3, table_function_name: numbers + ARGUMENTS + LIST id: 4, nodes: 1 + CONSTANT id: 5, constant_value: UInt64_1000, constant_value_type: UInt16 + GROUP BY + LIST id: 6, nodes: 3 + LIST id: 7, nodes: 1 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + LIST id: 9, nodes: 2 + FUNCTION id: 10, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 12, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 13, function_name: modulo, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 14, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 15, constant_value: UInt64_3, constant_value_type: UInt8 + LIST id: 16, nodes: 2 + FUNCTION id: 17, function_name: divide, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 18, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 19, constant_value: UInt64_2, constant_value_type: UInt8 + FUNCTION id: 20, function_name: divide, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 21, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 3 + CONSTANT id: 22, constant_value: UInt64_3, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql index 0c757cb111c..b51233f734c 100644 --- a/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql +++ b/tests/queries/0_stateless/02481_analyzer_optimize_grouping_sets_keys.sql @@ -15,3 +15,12 @@ SELECT avg(log(2) * number) AS k FROM numbers(10000000) GROUP BY GROUPING SETS (((number % 2) * (number % 3), number % 3), (number % 2)) HAVING avg(log(2) * number) > 3465735.3 ORDER BY k; + +EXPLAIN QUERY TREE run_passes=1 +SELECT count() FROM numbers(1000) +GROUP BY GROUPING SETS + ( + (number, number + 1, number +2), + (number % 2, number % 3), + (number / 2, number / 3) + ); diff --git a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference index f517be778ed..dfa09193761 100644 --- a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference +++ b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.reference @@ -31,6 +31,7 @@ select * from (explain pipeline select sum(x) from t settings max_threads=4, max Resize 32 → 16 MergeTreeThread × 32 0 → 1 -- For read-in-order, disable everything +set query_plan_remove_redundant_sorting=0; -- to keep reading in order select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1; 49999995000000 select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%'; diff --git a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql index 12b56cfe674..c8643b5c758 100644 --- a/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql +++ b/tests/queries/0_stateless/02493_max_streams_for_merge_tree_reading.sql @@ -1,5 +1,6 @@ -- Tags: no-random-merge-tree-settings +drop table if exists t; create table t (x UInt64) engine = MergeTree order by x; insert into t select number from numbers_mt(10000000) settings max_insert_threads=8; @@ -22,7 +23,11 @@ select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading= select * from (explain pipeline select sum(x) from t settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8) where explain like '%Resize%' or explain like '%MergeTreeThread%'; -- For read-in-order, disable everything +set query_plan_remove_redundant_sorting=0; -- to keep reading in order select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1; select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%'; select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8, optimize_read_in_order=1, query_plan_read_in_order=1; select * from (explain pipeline select sum(x) from (select x from t order by x) settings max_threads=4, max_streams_for_merge_tree_reading=16, allow_asynchronous_read_from_io_pool_for_merge_tree=1, max_streams_to_max_threads_ratio=8, optimize_read_in_order=1, query_plan_read_in_order=1) where explain like '%Resize%'; + +-- { echoOff } +drop table t; diff --git a/tests/queries/0_stateless/02494_query_result_cache_case_agnostic_matching.reference b/tests/queries/0_stateless/02494_query_result_cache_case_agnostic_matching.reference new file mode 100644 index 00000000000..d2397bbcd34 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_case_agnostic_matching.reference @@ -0,0 +1,6 @@ +1 +1 +--- +1 +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_case_agnostic_matching.sql b/tests/queries/0_stateless/02494_query_result_cache_case_agnostic_matching.sql new file mode 100644 index 00000000000..ad55de48acd --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_case_agnostic_matching.sql @@ -0,0 +1,27 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Start with empty query result cache (QRC) and query log +SYSTEM DROP QUERY RESULT CACHE; +DROP TABLE system.query_log SYNC; + +-- Insert an entry into the query result cache. +SELECT 1 SETTINGS enable_experimental_query_result_cache = true; +-- Check that entry in QRC exists +SELECT COUNT(*) FROM system.query_result_cache; + +-- Run the same SELECT but with different case (--> select). We want its result to be served from the QRC. +SELECT '---'; +select 1 SETTINGS enable_experimental_query_result_cache = true; + +-- There should still be just one entry in the QRC +SELECT COUNT(*) FROM system.query_result_cache; + +-- The second query should cause a QRC hit. +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryResultCacheHits'], ProfileEvents['QueryResultCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'select 1 SETTINGS enable_experimental_query_result_cache = true;'; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_drop_cache.reference b/tests/queries/0_stateless/02494_query_result_cache_drop_cache.reference new file mode 100644 index 00000000000..2f1465d1598 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_drop_cache.reference @@ -0,0 +1,3 @@ +1 +1 +0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_drop_cache.sql b/tests/queries/0_stateless/02494_query_result_cache_drop_cache.sql new file mode 100644 index 00000000000..a5669f6f3a2 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_drop_cache.sql @@ -0,0 +1,10 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Cache query result in query result cache +SELECT 1 SETTINGS enable_experimental_query_result_cache = true; +SELECT count(*) FROM system.query_result_cache; + +-- No query results are cached after DROP +SYSTEM DROP QUERY RESULT CACHE; +SELECT count(*) FROM system.query_result_cache; diff --git a/tests/queries/0_stateless/02494_query_result_cache_eligible_queries.reference b/tests/queries/0_stateless/02494_query_result_cache_eligible_queries.reference new file mode 100644 index 00000000000..33a09d872dd --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_eligible_queries.reference @@ -0,0 +1,19 @@ +1 +1 +0 +0 +0 +0 +eligible_test +0 +1 +0 +a String +b String +0 +1 +0 +0 +0 +0 +0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_eligible_queries.sql b/tests/queries/0_stateless/02494_query_result_cache_eligible_queries.sql new file mode 100644 index 00000000000..a17298a87f6 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_eligible_queries.sql @@ -0,0 +1,66 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; +DROP TABLE IF EXISTS eligible_test; +DROP TABLE IF EXISTS eligible_test2; + +-- enable query result cache session-wide but also force it individually in each of below statements +SET enable_experimental_query_result_cache = true; + +-- check that SELECT statements create entries in the query result cache ... +SELECT 1 SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; + +-- ... and all other statements also should not create entries: + +-- CREATE +CREATE TABLE eligible_test (a String) ENGINE=MergeTree ORDER BY a; -- SETTINGS enable_experimental_query_result_cache = true; -- SETTINGS rejected as unknown +SELECT COUNT(*) FROM system.query_result_cache; + +-- ALTER +ALTER TABLE eligible_test ADD COLUMN b String SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- INSERT +INSERT INTO eligible_test VALUES('a', 'b'); -- SETTINGS enable_experimental_query_result_cache = true; -- SETTINGS rejected as unknown +SELECT COUNT(*) FROM system.query_result_cache; +INSERT INTO eligible_test SELECT * FROM eligible_test SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- SHOW +SHOW TABLES SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- CHECK +CHECK TABLE eligible_test SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- DESCRIBE +DESCRIBE TABLE eligible_test SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- EXISTS +EXISTS TABLE eligible_test SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- KILL +KILL QUERY WHERE query_id='3-857d-4a57-9ee0-3c7da5d60a90' SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- OPTIMIZE +OPTIMIZE TABLE eligible_test FINAL SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- TRUNCATE +TRUNCATE TABLE eligible_test SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +-- RENAME +RENAME TABLE eligible_test TO eligible_test2 SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; +DROP TABLE eligible_test2; diff --git a/tests/queries/0_stateless/02494_query_result_cache_events.reference b/tests/queries/0_stateless/02494_query_result_cache_events.reference new file mode 100644 index 00000000000..db60d3699e0 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_events.reference @@ -0,0 +1,6 @@ +--- +1 +0 1 +--- +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_events.sql b/tests/queries/0_stateless/02494_query_result_cache_events.sql new file mode 100644 index 00000000000..1668a38c1a8 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_events.sql @@ -0,0 +1,30 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Start with empty query result cache (QRC) and query log +SYSTEM DROP QUERY RESULT CACHE; +DROP TABLE system.query_log SYNC; + +-- Run a query with QRC on. The first execution is a QRC miss. +SELECT '---'; +SELECT 1 SETTINGS enable_experimental_query_result_cache = true; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryResultCacheHits'], ProfileEvents['QueryResultCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS enable_experimental_query_result_cache = true;'; + + +-- Run previous query again with query result cache on +SELECT '---'; +SELECT 1 SETTINGS enable_experimental_query_result_cache = true; + +DROP TABLE system.query_log SYNC; +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryResultCacheHits'], ProfileEvents['QueryResultCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS enable_experimental_query_result_cache = true;'; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_exception_handling.reference b/tests/queries/0_stateless/02494_query_result_cache_exception_handling.reference new file mode 100644 index 00000000000..573541ac970 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_exception_handling.reference @@ -0,0 +1 @@ +0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_exception_handling.sql b/tests/queries/0_stateless/02494_query_result_cache_exception_handling.sql new file mode 100644 index 00000000000..34a46ea3d24 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_exception_handling.sql @@ -0,0 +1,10 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; + +-- If an exception is thrown during query execution, no entry must be created in the query result cache +SELECT throwIf(1) SETTINGS enable_experimental_query_result_cache = true; -- { serverError FUNCTION_THROW_IF_VALUE_IS_NON_ZERO } +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_explain.reference b/tests/queries/0_stateless/02494_query_result_cache_explain.reference new file mode 100644 index 00000000000..ecc965ac391 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_explain.reference @@ -0,0 +1,21 @@ +1 +1 +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromStorage (SystemNumbers) +Expression ((Projection + Before ORDER BY)) + Limit (preliminary LIMIT (without OFFSET)) + ReadFromStorage (SystemNumbers) +(Expression) +ExpressionTransform + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 +(Expression) +ExpressionTransform + (Limit) + Limit + (ReadFromStorage) + Numbers 0 → 1 +1 diff --git a/tests/queries/0_stateless/02494_query_result_cache_explain.sql b/tests/queries/0_stateless/02494_query_result_cache_explain.sql new file mode 100644 index 00000000000..48d47d4cf6c --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_explain.sql @@ -0,0 +1,21 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; + +-- Run a silly query with a non-trivial plan and put the result into the query result cache (QRC) +SELECT 1 + number from system.numbers LIMIT 1 SETTINGS enable_experimental_query_result_cache = true; +SELECT count(*) FROM system.query_result_cache; + +-- EXPLAIN PLAN should show the same regardless if the result is calculated or read from the QRC +EXPLAIN PLAN SELECT 1 + number from system.numbers LIMIT 1; +EXPLAIN PLAN SELECT 1 + number from system.numbers LIMIT 1 SETTINGS enable_experimental_query_result_cache = true; -- (*) + +-- EXPLAIN PIPELINE should show the same regardless if the result is calculated or read from the QRC +EXPLAIN PIPELINE SELECT 1 + number from system.numbers LIMIT 1; +EXPLAIN PIPELINE SELECT 1 + number from system.numbers LIMIT 1 SETTINGS enable_experimental_query_result_cache = true; -- (*) + +-- Statements (*) must not cache their results into the QRC +SELECT count(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_min_query_duration.reference b/tests/queries/0_stateless/02494_query_result_cache_min_query_duration.reference new file mode 100644 index 00000000000..a081d0a9c1a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_min_query_duration.reference @@ -0,0 +1,5 @@ +1 +1 +--- +1 +0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_min_query_duration.sql b/tests/queries/0_stateless/02494_query_result_cache_min_query_duration.sql new file mode 100644 index 00000000000..eb719411e7a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_min_query_duration.sql @@ -0,0 +1,18 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; + +-- This creates an entry in the query result cache ... +SELECT 1 SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; + +SELECT '---'; + +-- ... but this does not because the query executes much faster than the specified minumum query duration for caching the result +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_duration = 10000; +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_min_query_runs.reference b/tests/queries/0_stateless/02494_query_result_cache_min_query_runs.reference new file mode 100644 index 00000000000..ae3db066a88 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_min_query_runs.reference @@ -0,0 +1,14 @@ +1 +1 +--- +1 +0 +1 +1 +--- +1 +0 +1 +0 +1 +1 diff --git a/tests/queries/0_stateless/02494_query_result_cache_min_query_runs.sql b/tests/queries/0_stateless/02494_query_result_cache_min_query_runs.sql new file mode 100644 index 00000000000..7773c69e7c8 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_min_query_runs.sql @@ -0,0 +1,32 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; + +-- Cache the query result after the 1st query invocation +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_runs = 0; +SELECT COUNT(*) FROM system.query_result_cache; + +SELECT '---'; + +SYSTEM DROP QUERY RESULT CACHE; + +-- Cache the query result after the 2nd query invocation +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_runs = 1; +SELECT COUNT(*) FROM system.query_result_cache; +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_runs = 1; +SELECT COUNT(*) FROM system.query_result_cache; + +SELECT '---'; + +SYSTEM DROP QUERY RESULT CACHE; + +-- Cache the query result after the 3rd query invocation +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_runs = 2; +SELECT COUNT(*) FROM system.query_result_cache; +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_runs = 2; +SELECT COUNT(*) FROM system.query_result_cache; +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_min_query_runs = 2; +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_nondeterministic_functions.reference b/tests/queries/0_stateless/02494_query_result_cache_nondeterministic_functions.reference new file mode 100644 index 00000000000..cb6165c307a --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_nondeterministic_functions.reference @@ -0,0 +1,5 @@ +1 +0 +--- +1 +1 diff --git a/tests/queries/0_stateless/02494_query_result_cache_nondeterministic_functions.sql b/tests/queries/0_stateless/02494_query_result_cache_nondeterministic_functions.sql new file mode 100644 index 00000000000..73c5cf675a1 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_nondeterministic_functions.sql @@ -0,0 +1,16 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; + +-- rand() is non-deterministic, with default settings no entry in the query result cache should be created +SELECT COUNT(rand(1)) SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SELECT '---'; + +-- But an entry can be forced using a setting +SELECT COUNT(RAND(1)) SETTINGS enable_experimental_query_result_cache = true, query_result_cache_store_results_of_queries_with_nondeterministic_functions = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_normalize_ast.reference b/tests/queries/0_stateless/02494_query_result_cache_normalize_ast.reference new file mode 100644 index 00000000000..bc32b2f2cf3 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_normalize_ast.reference @@ -0,0 +1,5 @@ +1 +1 +--- +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_normalize_ast.sql b/tests/queries/0_stateless/02494_query_result_cache_normalize_ast.sql new file mode 100644 index 00000000000..25f27634397 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_normalize_ast.sql @@ -0,0 +1,28 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Start with empty query result cache (QRC) and query log. +SYSTEM DROP QUERY RESULT CACHE; +DROP TABLE system.query_log SYNC; + +-- Run query whose result gets cached in the query result cache. +-- Besides "enable_experimental_query_result_cache", pass two more knobs (one QRC-specific knob and one non-QRC-specific knob). We just care +-- *that* they are passed and not about their effect. +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_store_results_of_queries_with_nondeterministic_functions = true, max_threads = 16; + +-- Check that entry in QRC exists +SELECT COUNT(*) FROM system.query_result_cache; + +-- Run the same SELECT but with different SETTINGS. We want its result to be served from the QRC. +SELECT '---'; +SELECT 1 SETTINGS enable_experimental_query_result_cache_passive_usage = true, max_threads = 16; + +-- Technically, both SELECT queries have different ASTs, leading to different QRC keys. QRC does some AST normalization (erase all +-- QRC-related settings) such that the keys match regardless. Verify by checking that the second query caused a QRC hit. +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryResultCacheHits'], ProfileEvents['QueryResultCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS enable_experimental_query_result_cache_passive_usage = true, max_threads = 16;'; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_passive_usage.reference b/tests/queries/0_stateless/02494_query_result_cache_passive_usage.reference new file mode 100644 index 00000000000..edff09773d1 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_passive_usage.reference @@ -0,0 +1,12 @@ +1 +0 +----- +1 +0 +----- +1 +1 +----- +1 +1 +1 0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_passive_usage.sql b/tests/queries/0_stateless/02494_query_result_cache_passive_usage.sql new file mode 100644 index 00000000000..6de891006de --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_passive_usage.sql @@ -0,0 +1,39 @@ +-- Tags: no-parallel +-- Tag no-parallel: Messes with internal cache + +-- Start with empty query result cache (QRC). +SYSTEM DROP QUERY RESULT CACHE; + +-- By default, don't write query result into query result cache (QRC). +SELECT 1; +SELECT COUNT(*) FROM system.query_result_cache; + +SELECT '-----'; + +-- Try to retrieve query result from empty QRC using the passive mode. The cache should still be empty (no insert). +SELECT 1 SETTINGS enable_experimental_query_result_cache_passive_usage = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SELECT '-----'; + +-- Put query result into cache. +SELECT 1 SETTINGS enable_experimental_query_result_cache = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SELECT '-----'; + +-- Run same query with passive mode again. There must still be one entry in the QRC and we must have a QRC hit. + +-- Get rid of log of previous SELECT +DROP TABLE system.query_log SYNC; + +SELECT 1 SETTINGS enable_experimental_query_result_cache_passive_usage = true; +SELECT COUNT(*) FROM system.query_result_cache; + +SYSTEM FLUSH LOGS; +SELECT ProfileEvents['QueryResultCacheHits'], ProfileEvents['QueryResultCacheMisses'] +FROM system.query_log +WHERE type = 'QueryFinish' + AND query = 'SELECT 1 SETTINGS enable_experimental_query_result_cache_passive_usage = true;'; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_secrets.reference b/tests/queries/0_stateless/02494_query_result_cache_secrets.reference new file mode 100644 index 00000000000..dd6341262bc --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_secrets.reference @@ -0,0 +1,2 @@ +A2193552DCF8A9F99AC35F86BC4D2FFD +SELECT hex(encrypt(\'aes-128-ecb\', \'[HIDDEN]\')) SETTINGS diff --git a/tests/queries/0_stateless/02494_query_result_cache_secrets.sql b/tests/queries/0_stateless/02494_query_result_cache_secrets.sql new file mode 100644 index 00000000000..cd168ac1c6b --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_secrets.sql @@ -0,0 +1,13 @@ +-- Tags: no-parallel, no-fasttest +-- Tag no-fasttest: Depends on OpenSSL +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; + +-- Cache a result of a query with secret in the query result cache +SELECT hex(encrypt('aes-128-ecb', 'plaintext', 'passwordpassword')) SETTINGS enable_experimental_query_result_cache = true; + +-- The secret should not be revealed in system.query_result_cache +SELECT query FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02494_query_result_cache_ttl_long.reference b/tests/queries/0_stateless/02494_query_result_cache_ttl_long.reference new file mode 100644 index 00000000000..b8c79f4aee6 --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_ttl_long.reference @@ -0,0 +1,10 @@ +1 +1 +0 +0 +0 +1 +--- +1 +1 +0 diff --git a/tests/queries/0_stateless/02494_query_result_cache_ttl_long.sql b/tests/queries/0_stateless/02494_query_result_cache_ttl_long.sql new file mode 100644 index 00000000000..a418af6bf0b --- /dev/null +++ b/tests/queries/0_stateless/02494_query_result_cache_ttl_long.sql @@ -0,0 +1,29 @@ +-- Tags: no-fasttest, no-parallel, long +-- Tag no-fasttest: Test runtime is > 6 sec +-- Tag long: Test runtime is > 6 sec +-- Tag no-parallel: Messes with internal cache + +SYSTEM DROP QUERY RESULT CACHE; + +-- Cache query result into query result cache with a TTL of 3 sec +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_ttl = 3; + +-- Expect one non-stale cache entry +SELECT COUNT(*) FROM system.query_result_cache; +SELECT stale FROM system.query_result_cache; + +-- Wait until entry is expired +SELECT sleep(3); +SELECT sleep(3); +SELECT stale FROM system.query_result_cache; + +SELECT '---'; + +-- Run same query as before +SELECT 1 SETTINGS enable_experimental_query_result_cache = true, query_result_cache_ttl = 3; + +-- The entry should have been refreshed (non-stale) +SELECT COUNT(*) FROM system.query_result_cache; +SELECT stale FROM system.query_result_cache; + +SYSTEM DROP QUERY RESULT CACHE; diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.reference b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference new file mode 100644 index 00000000000..3a747d086eb --- /dev/null +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.reference @@ -0,0 +1,520 @@ +-- Disabled query_plan_remove_redundant_sorting +-- ORDER BY clauses in subqueries are untouched +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- Enabled query_plan_remove_redundant_sorting +-- ORDER BY removes ORDER BY clauses in subqueries +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- ORDER BY cannot remove ORDER BY in subquery WITH FILL +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC WITH FILL STEP 1 +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + Filling + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + LIMIT 1 BY number +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + Projection)) + Header: number UInt64 + LimitBy + Header: number UInt64 + Expression (Before LIMIT BY) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed +-- query +SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +-- explain +Expression ((Projection + Before ORDER BY)) +Header: number UInt64 + t2.number UInt64 + Join (JOIN FillRightFirst) + Header: number UInt64 + t2.number UInt64 + Expression ((Before JOIN + Projection)) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 + Expression ((Joined actions + (Rename joined columns + Projection))) + Header: t2.number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 2 +0 1 +0 0 +1 2 +1 1 +1 0 +2 2 +2 1 +2 0 +-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries +-- query +SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +ORDER BY t1.number, t2.number +-- explain +Expression (Projection) +Header: number UInt64 + t2.number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + t2.number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + t2.number UInt64 + Join (JOIN FillRightFirst) + Header: number UInt64 + t2.number UInt64 + Expression ((Before JOIN + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 + Expression ((Joined actions + (Rename joined columns + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) + Header: t2.number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 0 +0 1 +0 2 +1 0 +1 1 +1 2 +2 0 +2 1 +2 2 +-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries +-- query +SELECT sum(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number +-- explain +Expression ((Projection + Before ORDER BY)) +Header: sum(number) UInt64 + Aggregating + Header: number UInt64 + sum(number) UInt64 + Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +2 +1 +-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery +-- query +SELECT any(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number +-- explain +Expression ((Projection + Before ORDER BY)) +Header: number UInt64 + Aggregating + Header: number UInt64 + Expression ((Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +2 +1 +-- query with aggregation function but w/o GROUP BY -> remove sorting +-- query +SELECT sum(number) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +-- explain +Expression ((Projection + Before ORDER BY)) +Header: sum(number) UInt64 + Aggregating + Header: sum(number) UInt64 + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +45 +-- check that optimization is applied recursively to subqueries as well +-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here +-- query +SELECT a +FROM +( + SELECT sum(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC +-- explain +Expression (Projection) +Header: a UInt64 + Sorting (Sorting for ORDER BY) + Header: a UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: a UInt64 + Aggregating + Header: number UInt64 + sum(number) UInt64 + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function +-- query +SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC +-- explain +Expression (Projection) +Header: a UInt64 + Sorting (Sorting for ORDER BY) + Header: a UInt64 + Expression ((Before ORDER BY + (Projection + Before ORDER BY))) + Header: a UInt64 + Aggregating + Header: number UInt64 + Expression ((Before GROUP BY + (Projection + Before ORDER BY))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY +-- query +SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + ) + GROUP BY number +) +WHERE a > 0 +-- explain +Expression ((Projection + (Before ORDER BY + ))) +Header: a UInt64 + Aggregating + Header: number UInt64 + Filter + Header: number UInt64 + Filter (( + (Before GROUP BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY)))))) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +2 +1 +-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps +-- query +SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + GROUP BY number + ORDER BY number ASC + ) + ORDER BY number ASC +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression ((Before ORDER BY + (Projection + (Before ORDER BY + (Projection + Before ORDER BY))))) + Header: number UInt64 + Aggregating + Header: number UInt64 + Expression (Before GROUP BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 +1 +2 +-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function +-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order +-- query +SELECT + number, + neighbor(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +ORDER BY number ASC +-- explain +Expression (Projection) +Header: number UInt64 + neighbor(number, 2) UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + neighbor(number, 2) UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + neighbor(number, 2) UInt64 + Expression (Projection) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +0 0 +1 0 +2 0 +3 1 +4 2 +5 3 +6 4 +7 5 +8 6 +9 7 +-- non-stateful function does _not_ prevent removing inner ORDER BY +-- query +SELECT + number, + plus(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +-- explain +Expression (Projection) +Header: number UInt64 + plus(number, 2) UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + plus(number, 2) UInt64 + Expression (Projection) + Header: number UInt64 + Sorting (Sorting for ORDER BY) + Header: number UInt64 + Expression (Before ORDER BY) + Header: number UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 +-- execute +9 11 +8 10 +7 9 +6 8 +5 7 +4 6 +3 5 +2 4 +1 3 +0 2 diff --git a/tests/queries/0_stateless/02496_remove_redundant_sorting.sh b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh new file mode 100755 index 00000000000..d9d46681816 --- /dev/null +++ b/tests/queries/0_stateless/02496_remove_redundant_sorting.sh @@ -0,0 +1,262 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DISABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=0;SET optimize_duplicate_order_by_and_distinct=0" +ENABLE_OPTIMIZATION="SET query_plan_remove_redundant_sorting=1;SET optimize_duplicate_order_by_and_distinct=0" + +echo "-- Disabled query_plan_remove_redundant_sorting" +echo "-- ORDER BY clauses in subqueries are untouched" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +ORDER BY number ASC" +$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;EXPLAIN header=1 $query" + +function run_query { + echo "-- query" + echo "$1" + echo "-- explain" + $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;EXPLAIN header=1 $1" + echo "-- execute" + $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$1" +} + +echo "-- Enabled query_plan_remove_redundant_sorting" +echo "-- ORDER BY removes ORDER BY clauses in subqueries" +run_query "$query" + +echo "-- ORDER BY cannot remove ORDER BY in subquery WITH FILL" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC WITH FILL STEP 1 +) +ORDER BY number ASC" +run_query "$query" + +echo "-- ORDER BY cannot remove ORDER BY in subquery with LIMIT BY" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + LIMIT 1 BY number +) +ORDER BY number ASC" +run_query "$query" + +echo "-- CROSS JOIN with subqueries, nor ORDER BY nor GROUP BY in main query -> only ORDER BY clauses in most inner subqueries will be removed" +query="SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2" +run_query "$query" + +echo "-- CROSS JOIN with subqueries, ORDER BY in main query -> all ORDER BY clauses will be removed in subqueries" +query="SELECT * +FROM +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC +) AS t1, +( + SELECT number + FROM + ( + SELECT number + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) AS t2 +ORDER BY t1.number, t2.number" +run_query "$query" + +echo "-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY(s) in _all_ subqueries" +query="SELECT sum(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number" +run_query "$query" + +echo "-- GROUP BY with aggregation function which depends on order -> keep ORDER BY in first subquery, and eliminate in second subquery" +query="SELECT any(number) +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + ORDER BY number DESC +) +GROUP BY number" +run_query "$query" + +echo "-- query with aggregation function but w/o GROUP BY -> remove sorting" +query="SELECT sum(number) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +)" +run_query "$query" + +echo "-- check that optimization is applied recursively to subqueries as well" +echo "-- GROUP BY with aggregation function which does NOT depend on order -> eliminate ORDER BY in most inner subquery here" +query="SELECT a +FROM +( + SELECT sum(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC" +run_query "$query" + +echo "-- GROUP BY with aggregation function which depends on order -> ORDER BY in subquery is kept due to the aggregation function" +query="SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number ASC + ) + GROUP BY number +) +ORDER BY a ASC" +run_query "$query" + +echo "-- Check that optimization works for subqueries as well, - main query have neither ORDER BY nor GROUP BY" +query="SELECT a +FROM +( + SELECT any(number) AS a + FROM + ( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + ORDER BY number DESC + ) + ORDER BY number ASC + ) + GROUP BY number +) +WHERE a > 0" +run_query "$query" + +echo "-- GROUP BY in most inner query makes execution parallelized, and removing inner sorting steps will keep it that way. But need to correctly update data streams sorting properties after removing sorting steps" +query="SELECT * +FROM +( + SELECT * + FROM + ( + SELECT * + FROM numbers(3) + GROUP BY number + ORDER BY number ASC + ) + ORDER BY number ASC +) +ORDER BY number ASC" +run_query "$query" + +echo "-- disable common optimization to avoid functions to be lifted up (liftUpFunctions optimization), needed for testing with stateful function" +ENABLE_OPTIMIZATION="SET query_plan_enable_optimizations=0;$ENABLE_OPTIMIZATION" +echo "-- neighbor() as stateful function prevents removing inner ORDER BY since its result depends on order" +query="SELECT + number, + neighbor(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +) +ORDER BY number ASC" +run_query "$query" + +echo "-- non-stateful function does _not_ prevent removing inner ORDER BY" +query="SELECT + number, + plus(number, 2) +FROM +( + SELECT * + FROM numbers(10) + ORDER BY number DESC +)" +run_query "$query" diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.sql b/tests/queries/0_stateless/02517_fuse_bug_44712.sql deleted file mode 100644 index 894bf9e06d5..00000000000 --- a/tests/queries/0_stateless/02517_fuse_bug_44712.sql +++ /dev/null @@ -1,10 +0,0 @@ -DROP TABLE IF EXISTS fuse_tbl__fuzz_35; - -CREATE TABLE fuse_tbl__fuzz_35 (`a` UInt8, `b` Nullable(Int16)) ENGINE = Log; -INSERT INTO fuse_tbl__fuzz_35 SELECT number, number + 1 FROM numbers(1000); - -set allow_experimental_analyzer = 0, optimize_syntax_fuse_functions = 1, optimize_fuse_sum_count_avg = 1; - -SELECT quantile(0.5)(b), quantile(0.9)(b) FROM (SELECT x + 2147483648 AS b FROM (SELECT quantile(0.5)(b) AS x FROM fuse_tbl__fuzz_35) GROUP BY x) FORMAT Null; - -DROP TABLE IF EXISTS fuse_tbl__fuzz_35; diff --git a/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.reference b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.reference new file mode 100644 index 00000000000..4279025be39 --- /dev/null +++ b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.reference @@ -0,0 +1,173 @@ +CSV +1 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +456 World [4,5,6] +2 +x UInt32 +y String +z Array(UInt32) +123 Hello [1,2,3] +456 World [4,5,6] +3 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +123 Hello World +456 World Hello +4 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +123 Hello World +456 World Hello +5 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +6 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +7 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array +8 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array +123 Hello [1,2,3] +9 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +10 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 "Hello" [1,2,3] +11 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Hello \N World +12 +x Nullable(String) +y Nullable(String) +z Array(Nullable(Int64)) +Hello \N [1,2,3] +13 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Hello \N World +\N Hello \N +14 +x Nullable(String) +y Nullable(String) +z Array(Nullable(Int64)) +Hello \N [] +\N \N [1,2,3] +15 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Hello \N \N +\N World \N +16 +a"b Nullable(Int64) +c Nullable(Int64) +1 2 +17 +1 2 +18 +1 0 +TSV +1 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +456 World [4,5,6] +2 +x UInt32 +y String +z Array(UInt32) +123 Hello [1,2,3] +456 World [4,5,6] +3 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Foo Hello World +Bar World Hello +4 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +Foo Hello World +Bar World Hello +CustomSeparated +1 +x Nullable(Int64) +y Nullable(String) +z Array(Nullable(Int64)) +123 Hello [1,2,3] +456 World [4,5,6] +2 +x UInt32 +y String +z Array(UInt32) +123 Hello [1,2,3] +456 World [4,5,6] +3 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +Foo Hello World +Bar World Hello +4 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +Foo Hello World +Bar World Hello +5 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +6 +c1 Nullable(String) +c2 Nullable(String) +c3 Nullable(String) +x y z +UInt32 String Array(UInt32) +7 +x UInt32 +y String +z Array(UInt32) +42 Hello [1,2,3] diff --git a/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.sh b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.sh new file mode 100755 index 00000000000..a20afef875e --- /dev/null +++ b/tests/queries/0_stateless/02521_tsv_csv_custom_header_detection.sh @@ -0,0 +1,313 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +echo "CSV" +echo 1 +echo '"x","y","z" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 2 +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +123,"Hello","[1,2,3]" +456,"World","[4,5,6]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 3 +echo '"x","y","z" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 4 +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array(UInt32)" +"123","Hello","World" +"456","World","Hello"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 5 +echo '"x","y","z"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; +echo '"x","y","z"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 6 +echo '"x","y","z" +"UInt32","String","Array(UInt32)"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array(UInt32)"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 7 +echo '"x","y","z" +"UInt32","String","Array"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 8 +echo '"x","y","z" +"UInt32","String","Array" +"123","Hello","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"UInt32","String","Array" +"123","Hello","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 9 +echo '"x","y","z" +123,"Hello","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +123,"Hello","[1,2,3]"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 10 +echo '"x","y","z" +123,"""Hello""","[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +123,"""Hello""","[1,2,3]"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 11 +echo '"x","y","z" +"Hello",\N,"World"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,"World"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 12 +echo '"x","y","z" +"Hello",\N,"[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,"[1,2,3]"'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 13 +echo '"x","y","z" +"Hello",\N,"World" +\N,"Hello",\N' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,"World" +\N,"Hello",\N'| $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 14 +echo '"x","y","z" +"Hello",\N,\N +\N,\N,"[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,\N +\N,\N,"[1,2,3]"' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 15 +echo '"x","y","z" +"Hello",\N,\N +\N,"World",\N' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"x","y","z" +"Hello",\N,\N +\N,"World",\N' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 16 +echo '"a""b","c" +1,2' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "desc test"; + +echo '"a""b","c" +1,2' | $CLICKHOUSE_LOCAL --input-format='CSV' --table='test' -q "select * from test"; + +echo 17 +echo '"a","b","c" +1,2,3' | $CLICKHOUSE_LOCAL --input-format='CSV' --structure='a UInt32, b UInt32' --table='test' -q "select * from test"; + +echo 18 +echo '"a" +1' | $CLICKHOUSE_LOCAL --input-format='CSV' --structure='a UInt32, b UInt32' --table='test' -q "select * from test"; + +echo "TSV" +echo 1 +echo -e 'x\ty\tz +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo 2 +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +123\tHello\t[1,2,3] +456\tWorld\t[4,5,6]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo 3 +echo -e 'x\ty\tz +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo 4 +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + +echo -e 'x\ty\tz +UInt32\tString\tArray(UInt32) +Foo\tHello\tWorld +Bar\tWorld\tHello' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "select * from test"; + +echo "CustomSeparated" + +echo 1 +echo ' +"x""y""z" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + +echo 2 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +123"Hello""[1,2,3]" + +456"World""[4,5,6]" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 3 +echo ' +"x""y""z" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 4 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +"Foo""Hello""World" + +"Bar""World""Hello" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 5 +echo ' +"x""y""z" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo 6 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='CSV' + +echo 7 +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +42"Hello"[1,2,3] +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "desc test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='JSON' + + +echo ' +"x""y""z" + +"UInt32""String""Array(UInt32)" + +42"Hello"[1,2,3] +' | $CLICKHOUSE_LOCAL --input-format='CustomSeparated' --table='test' -q "select * from test" --format_custom_row_before_delimiter='' --format_custom_row_after_delimiter=$'\n' --format_custom_row_between_delimiter=$'\n' --format_custom_result_before_delimiter=$'\n' --format_custom_result_after_delimiter=$'\n' --format_custom_field_delimiter='' --format_custom_escaping_rule='JSON' + + diff --git a/tests/queries/0_stateless/02531_ipv4_arithmetic.reference b/tests/queries/0_stateless/02531_ipv4_arithmetic.reference new file mode 100644 index 00000000000..6f03e4e6903 --- /dev/null +++ b/tests/queries/0_stateless/02531_ipv4_arithmetic.reference @@ -0,0 +1,20 @@ +10 1.2.3.4 0 +11 1.2.3.4 3 +12 1.2.3.4 4 +13 1.2.3.4 12 +14 1.2.3.4 0 +15 1.2.3.4 10 +16 1.2.3.4 4 +17 1.2.3.4 10 +18 1.2.3.4 4 +19 1.2.3.4 10 +20 1.2.3.4 0 +21 1.2.3.4 7 +22 1.2.3.4 14 +23 1.2.3.4 12 +24 1.2.3.4 4 +25 1.2.3.4 10 +26 1.2.3.4 12 +27 1.2.3.4 13 +28 1.2.3.4 0 +29 1.2.3.4 1 diff --git a/tests/queries/0_stateless/02531_ipv4_arithmetic.sql b/tests/queries/0_stateless/02531_ipv4_arithmetic.sql new file mode 100644 index 00000000000..59a99842d61 --- /dev/null +++ b/tests/queries/0_stateless/02531_ipv4_arithmetic.sql @@ -0,0 +1 @@ +SELECT number, ip, ip % number FROM (SELECT number, toIPv4('1.2.3.4') as ip FROM numbers(10, 20)); \ No newline at end of file diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference new file mode 100644 index 00000000000..aaef17371d8 --- /dev/null +++ b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.reference @@ -0,0 +1,38 @@ +-- { echoOn } + +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; +12321 -30 \N \N +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; +\N \N 12321 -30 +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +123 123 123 5600 +321 -32 321 5601 +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +123 5600 123 123 +321 5601 321 -32 +SELECT test2.col1, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +123 123 123 +321 321 -32 +SELECT test2.col3, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; +5600 123 123 +5601 321 -32 +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql new file mode 100644 index 00000000000..073f81e4ff3 --- /dev/null +++ b/tests/queries/0_stateless/02534_join_prewhere_bug_44062.sql @@ -0,0 +1,46 @@ + +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; + +CREATE TABLE test1 ( `col1` UInt64, `col2` Int8 ) ENGINE = MergeTree ORDER BY col1; +CREATE TABLE test2 ( `col1` UInt64, `col3` Int16 ) ENGINE = MergeTree ORDER BY col1; + +INSERT INTO test1 VALUES (123, 123), (12321, -30), (321, -32); +INSERT INTO test2 VALUES (123, 5600), (321, 5601); + +SET join_use_nulls = 1; + +-- { echoOn } + +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; + +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NULL +ORDER BY test2.col1 +; + +SELECT * FROM test1 LEFT JOIN test2 ON test1.col1 = test2.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +SELECT * FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +SELECT test2.col1, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +SELECT test2.col3, test1.* FROM test2 RIGHT JOIN test1 ON test2.col1 = test1.col1 +WHERE test2.col1 IS NOT NULL +ORDER BY test2.col1 +; + +DROP TABLE IF EXISTS test1; +DROP TABLE IF EXISTS test2; diff --git a/tests/queries/0_stateless/02535_ip_parser_not_whole.reference b/tests/queries/0_stateless/02535_ip_parser_not_whole.reference new file mode 100644 index 00000000000..31502960af3 --- /dev/null +++ b/tests/queries/0_stateless/02535_ip_parser_not_whole.reference @@ -0,0 +1,3 @@ +::1 42 +::1 42 +::1 42 diff --git a/tests/queries/0_stateless/02535_ip_parser_not_whole.sql b/tests/queries/0_stateless/02535_ip_parser_not_whole.sql new file mode 100644 index 00000000000..675707d197b --- /dev/null +++ b/tests/queries/0_stateless/02535_ip_parser_not_whole.sql @@ -0,0 +1,3 @@ +SELECT * FROM format(CSVWithNamesAndTypes, 'ip,port\nIPv6,UInt16\n::1,42\n'); +SELECT * FROM format(TSVWithNamesAndTypes, 'ip\tport\nIPv6\tUInt16\n::1\t42\n'); +SELECT * FROM format(JSONCompactEachRowWithNamesAndTypes, '["ip","port"]\n["IPv6","UInt16"]\n["::1",42]\n'); diff --git a/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.reference b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.reference new file mode 100644 index 00000000000..7a930fd0fb3 --- /dev/null +++ b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.reference @@ -0,0 +1 @@ +(1,2) diff --git a/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.sql b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.sql new file mode 100644 index 00000000000..6ca79ba30ba --- /dev/null +++ b/tests/queries/0_stateless/02536_hdfs_cluster_use_structure_from_table.sql @@ -0,0 +1,11 @@ +-- Tags: no-fasttest, no-parallel, no-cpu-aarch64 +-- Tag no-fasttest: Depends on Java + +insert into table function hdfs('hdfs://localhost:12222/test_02536.jsonl', 'TSV') select '{"x" : {"a" : 1, "b" : 2}}' settings hdfs_truncate_on_insert=1; +drop table if exists test; +create table test (x Tuple(a UInt32, b UInt32)) engine=Memory(); +insert into test select * from hdfsCluster('test_cluster_two_shards_localhost', 'hdfs://localhost:12222/test_02536.jsonl') settings use_structure_from_insertion_table_in_table_functions=0; -- {serverError TYPE_MISMATCH} +insert into test select * from hdfsCluster('test_cluster_two_shards_localhost', 'hdfs://localhost:12222/test_02536.jsonl') settings use_structure_from_insertion_table_in_table_functions=1; +select * from test; +drop table test; + diff --git a/tests/queries/0_stateless/02537_system_formats.reference b/tests/queries/0_stateless/02537_system_formats.reference new file mode 100644 index 00000000000..5987834d9b9 --- /dev/null +++ b/tests/queries/0_stateless/02537_system_formats.reference @@ -0,0 +1,2 @@ +CSV 1 1 1 1 +Native 1 1 0 0 diff --git a/tests/queries/0_stateless/02537_system_formats.sql b/tests/queries/0_stateless/02537_system_formats.sql new file mode 100644 index 00000000000..7a09daf325c --- /dev/null +++ b/tests/queries/0_stateless/02537_system_formats.sql @@ -0,0 +1 @@ +SELECT * FROM system.formats WHERE name IN ('CSV', 'Native') ORDER BY name; diff --git a/tests/queries/0_stateless/02538_analyzer_create_table_as_select.reference b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.reference new file mode 100644 index 00000000000..f6ac79e2047 --- /dev/null +++ b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.reference @@ -0,0 +1 @@ +0 Value diff --git a/tests/queries/0_stateless/02538_analyzer_create_table_as_select.sql b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.sql new file mode 100644 index 00000000000..168066ce2f9 --- /dev/null +++ b/tests/queries/0_stateless/02538_analyzer_create_table_as_select.sql @@ -0,0 +1,18 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table_data; +CREATE TABLE test_table_data +( + id UInt64, + value String +) ENGINE=MergeTree() ORDER BY id; + +INSERT INTO test_table_data VALUES (0, 'Value'); + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table ENGINE=MergeTree() ORDER BY tuple() AS SELECT * FROM test_table_data; + +SELECT * FROM test_table; + +DROP TABLE test_table_data; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02517_fuse_bug_44712.reference b/tests/queries/0_stateless/02538_ngram_bf_index_with_null.reference similarity index 100% rename from tests/queries/0_stateless/02517_fuse_bug_44712.reference rename to tests/queries/0_stateless/02538_ngram_bf_index_with_null.reference diff --git a/tests/queries/0_stateless/02538_ngram_bf_index_with_null.sql b/tests/queries/0_stateless/02538_ngram_bf_index_with_null.sql new file mode 100644 index 00000000000..b53c219ff03 --- /dev/null +++ b/tests/queries/0_stateless/02538_ngram_bf_index_with_null.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS 02538_bf_ngrambf_map_values_test; + +CREATE TABLE 02538_bf_ngrambf_map_values_test (`row_id` Int128, `map` Map(String, String), `map_fixed` Map(FixedString(2), String), +INDEX map_values_ngrambf mapKeys(map) TYPE ngrambf_v1(4, 256, 2, 0) GRANULARITY 1, +INDEX map_fixed_values_ngrambf mapKeys(map_fixed) TYPE ngrambf_v1(4, 256, 2, 0) GRANULARITY 1) +ENGINE = MergeTree +ORDER BY row_id +SETTINGS index_granularity = 1; + +INSERT INTO 02538_bf_ngrambf_map_values_test VALUES (1, {'a': 'a'}, {'b': 'b'}); + +SELECT * FROM 02538_bf_ngrambf_map_values_test PREWHERE (map['']) = 'V2V\0V2V2V2V2V2V2' WHERE (map[NULL]) = 'V2V\0V2V2V2V2V2V2V2V\0V2V2V2V2V2V2V2V\0V2V2V2V2V2V2V2V\0V2V2V2V2V2V2' SETTINGS force_data_skipping_indices = 'map_values_ngrambf'; + +DROP TABLE 02538_bf_ngrambf_map_values_test; diff --git a/tests/queries/1_stateful/00172_hits_joins.reference.j2 b/tests/queries/1_stateful/00172_hits_joins.reference.j2 index c357ede4c2c..1a43f1fb6ef 100644 --- a/tests/queries/1_stateful/00172_hits_joins.reference.j2 +++ b/tests/queries/1_stateful/00172_hits_joins.reference.j2 @@ -1,4 +1,4 @@ -{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge'] -%} +{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge', 'grace_hash'] -%} --- {{ join_algorithm }} --- 2014-03-17 1406958 265108 2014-03-19 1405797 261624 diff --git a/tests/queries/1_stateful/00172_hits_joins.sql.j2 b/tests/queries/1_stateful/00172_hits_joins.sql.j2 index 07ea899f536..4599d1d5a5d 100644 --- a/tests/queries/1_stateful/00172_hits_joins.sql.j2 +++ b/tests/queries/1_stateful/00172_hits_joins.sql.j2 @@ -1,6 +1,7 @@ -{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge'] -%} +{% for join_algorithm in ['hash', 'parallel_hash', 'full_sorting_merge', 'grace_hash'] -%} -SET max_bytes_in_join = '{% if join_algorithm == 'grace_hash' %}20K{% else %}0{% endif %}'; +SET max_rows_in_join = '{% if join_algorithm == 'grace_hash' %}10K{% else %}0{% endif %}'; +SET grace_hash_join_initial_buckets = 4; SELECT '--- {{ join_algorithm }} ---'; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 0bf8023d698..761034ac8dc 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -104,6 +104,8 @@ NYPD NuRaft ObjectId Ok +OLAP +OLTP OpenSUSE OpenStack OpenTelemetry @@ -125,6 +127,8 @@ PrettySpaceNoEscapesMonoBlock Protobuf ProtobufSingle QTCreator +QueryResultCacheHits +QueryResultCacheMisses RBAC RawBLOB RedHat @@ -490,6 +494,7 @@ tokenization toml toolchain toolset +transactionally tskv tsv tui diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index f11bf7a0c26..e4ff5f57175 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v23.1.1.3077-stable 2023-01-25 v22.12.3.5-stable 2023-01-10 v22.12.2.25-stable 2023-01-06 v22.12.1.1752-stable 2022-12-15 @@ -5,6 +6,7 @@ v22.11.4.3-stable 2023-01-10 v22.11.3.47-stable 2023-01-09 v22.11.2.30-stable 2022-12-02 v22.11.1.1360-stable 2022-11-17 +v22.10.7.13-stable 2023-01-26 v22.10.6.3-stable 2023-01-10 v22.10.5.54-stable 2023-01-09 v22.10.4.23-stable 2022-12-02