diff --git a/.github/ISSUE_TEMPLATE/10_question.md b/.github/ISSUE_TEMPLATE/10_question.md index 5b3d00a3180..0992bf06217 100644 --- a/.github/ISSUE_TEMPLATE/10_question.md +++ b/.github/ISSUE_TEMPLATE/10_question.md @@ -7,6 +7,6 @@ assignees: '' --- -> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in Telegram chat https://telegram.me/clickhouse_en is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse +> Make sure to check documentation https://clickhouse.com/docs/en/ first. If the question is concise and probably has a short answer, asking it in [community Slack](https://join.slack.com/t/clickhousedb/shared_invite/zt-1gh9ds7f4-PgDhJAaF8ad5RbWBAAjzFg) is probably the fastest way to find the answer. For more complicated questions, consider asking them on StackOverflow with "clickhouse" tag https://stackoverflow.com/questions/tagged/clickhouse > If you still prefer GitHub issues, remove all this text and ask your question here. diff --git a/.github/workflows/docs_check.yml b/.github/workflows/docs_check.yml index 7a15e77becb..5c912ebd359 100644 --- a/.github/workflows/docs_check.yml +++ b/.github/workflows/docs_check.yml @@ -16,6 +16,7 @@ on: # yamllint disable-line rule:truthy - 'docker/docs/**' - 'docs/**' - 'website/**' + - 'utils/check-style/aspell-ignore/**' jobs: CheckLabels: runs-on: [self-hosted, style-checker] diff --git a/.github/workflows/docs_release.yml b/.github/workflows/docs_release.yml index da67edd4aa1..1b43138852b 100644 --- a/.github/workflows/docs_release.yml +++ b/.github/workflows/docs_release.yml @@ -17,6 +17,7 @@ concurrency: - 'docs/**' - 'utils/list-versions/version_date.tsv' - 'website/**' + - 'utils/check-style/aspell-ignore/**' workflow_dispatch: jobs: DockerHubPushAarch64: diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f3d672136ef..3eca97441f5 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -842,7 +842,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinAmd64SSE2: + BuilderBinAmd64Compat: needs: [DockerHubPush] runs-on: [self-hosted, builder] steps: @@ -853,7 +853,7 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=binary_amd64sse2 + BUILD_NAME=binary_amd64_compat EOF - name: Download changed images uses: actions/download-artifact@v2 @@ -1017,7 +1017,7 @@ jobs: - BuilderBinFreeBSD # - BuilderBinGCC - BuilderBinPPC64 - - BuilderBinAmd64SSE2 + - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 857e2c7f604..a81f52a9371 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -16,6 +16,7 @@ on: # yamllint disable-line rule:truthy - 'docker/docs/**' - 'docs/**' - 'website/**' + - 'utils/check-style/aspell-ignore/**' ########################################################################################## ##################################### SMALL CHECKS ####################################### ########################################################################################## @@ -900,7 +901,7 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" - BuilderBinAmd64SSE2: + BuilderBinAmd64Compat: needs: [DockerHubPush, FastTest, StyleCheck] runs-on: [self-hosted, builder] steps: @@ -911,7 +912,7 @@ jobs: IMAGES_PATH=${{runner.temp}}/images_path REPO_COPY=${{runner.temp}}/build_check/ClickHouse CACHES_PATH=${{runner.temp}}/../ccaches - BUILD_NAME=binary_amd64sse2 + BUILD_NAME=binary_amd64_compat EOF - name: Download changed images uses: actions/download-artifact@v2 @@ -1070,7 +1071,7 @@ jobs: - BuilderBinFreeBSD # - BuilderBinGCC - BuilderBinPPC64 - - BuilderBinAmd64SSE2 + - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy - BuilderDebShared diff --git a/.gitignore b/.gitignore index 6d94cade384..7d915186dcc 100644 --- a/.gitignore +++ b/.gitignore @@ -158,3 +158,9 @@ website/package-lock.json # temporary test files tests/queries/0_stateless/test_* tests/queries/0_stateless/*.binary +tests/queries/0_stateless/*.generated-expect + +# rust +/rust/**/target +# It is autogenerated from *.in +/rust/**/.cargo/config.toml diff --git a/.gitmodules b/.gitmodules index 070109eb32d..0805b6d5492 100644 --- a/.gitmodules +++ b/.gitmodules @@ -269,9 +269,6 @@ [submodule "contrib/vectorscan"] path = contrib/vectorscan url = https://github.com/VectorCamp/vectorscan.git -[submodule "contrib/liburing"] - path = contrib/liburing - url = https://github.com/axboe/liburing.git [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/ClickHouse/c-ares @@ -294,3 +291,6 @@ [submodule "contrib/google-benchmark"] path = contrib/google-benchmark url = https://github.com/google/benchmark.git +[submodule "contrib/libdivide"] + path = contrib/libdivide + url = https://github.com/ridiculousfish/libdivide.git diff --git a/CHANGELOG.md b/CHANGELOG.md index 0e41894b8bd..ddc10c1eb2f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,5 @@ ### Table of Contents +**[ClickHouse release v22.12, 2022-12-15](#2212)**
**[ClickHouse release v22.11, 2022-11-17](#2211)**
**[ClickHouse release v22.10, 2022-10-25](#2210)**
**[ClickHouse release v22.9, 2022-09-22](#229)**
@@ -12,6 +13,124 @@ **[ClickHouse release v22.1, 2022-01-18](#221)**
**[Changelog for 2021](https://clickhouse.com/docs/en/whats-new/changelog/2021/)**
+# 2022 Changelog + +### ClickHouse release 22.12, 2022-12-15 + +#### Backward Incompatible Change +* Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). If you have a column or an alias named `all` and doing `GROUP BY all` without the intention to group by all the columns, the query will have a different semantic. To keep the old semantic, put `all` into backticks or double quotes `"all"` to make it an identifier instead of a keyword. + +#### Upgrade Notes +* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend upgrading from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append an extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then the incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Alexander Tokmakov](https://github.com/tavplubix), [Raúl Marín](https://github.com/Algunenano)). Note: all the official ClickHouse builds already include the patches. This is not necessarily true for unofficial third-party builds that should be avoided. + +#### New Feature +* Add `BSONEachRow` input/output format. In this format, ClickHouse formats/parses each row as a separate BSON document and each column is formatted/parsed as a single BSON field with the column name as the key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). +* Add `grace_hash` JOIN algorithm, it can be enabled with `SET join_algorithm = 'grace_hash'`. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye), [Vladimir C](https://github.com/vdimir)). +* Allow configuring password complexity rules and checks for creating and changing users. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)). +* Mask sensitive information in logs; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `GROUP BY ALL` syntax: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). +* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added function `concatWithSeparator` and `concat_ws` as an alias for Spark SQL compatibility. A function `concatWithSeparatorAssumeInjective` added as a variant to enable GROUP BY optimization, similarly to `concatAssumeInjective`. [#43749](https://github.com/ClickHouse/ClickHouse/pull/43749) ([李扬](https://github.com/taiyang-li)). +* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)). +* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)). +* Add support for embedded Prometheus endpoint for ClickHouse Keeper. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)). +* Support numeric literals with `_` as the separator, for example, `1_000_000`. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)). +* Added possibility to use an array as a second parameter for `cutURLParameter` function. It will cut multiple parameters. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)). +* Add a column with the expression of the index in the `system.data_skipping_indices` table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)). +* Add column `engine_full` to system table `databases` so that users can access the entire engine definition of a database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)). +* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also, the performance of `xxHash32` and `xxHash64` are improved on ARM thanks to a library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)). +* Added support to define constraints for merge tree settings. For example you can forbid overriding the `storage_policy` by users. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)). +* Add a new setting `input_format_json_read_objects_as_strings` that allows the parsing of nested JSON objects into Strings in all JSON input formats. This setting is disabled by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Experimental Feature +* Support deduplication for asynchronous inserts. Before this change, async inserts did not support deduplication, because multiple small inserts coexisted in one inserted batch. Closes [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075). [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). +* Add support for cosine distance for the experimental Annoy (vector similarity search) index. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Add `CREATE / ALTER / DROP NAMED COLLECTION` queries. [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). This feature is under development and the queries are not effective as of version 22.12. This changelog entry is added only to avoid confusion. Restrict default access to named collections to the user defined in config. This requires that `show_named_collections = 1` is set to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). The `system.named_collections` table is introduced [#43147](https://github.com/ClickHouse/ClickHouse/pull/43147) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Performance Improvement +* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables a background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). This improves performance up to 100 times in case of high latency storage, low number of CPU and high number of data parts. +* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage when using remote filesystems. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Optimized the number of list requests to ZooKeeper or ClickHouse Keeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Optimization is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small a value. The default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)). +* Speed-up server shutdown by avoiding cleaning up of old data parts. Because it is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)). +* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)). +* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)). +* Keeper improvement: requests are batched more often. The batching can be controlled with the new setting `max_requests_quick_batch_size`. [#43686](https://github.com/ClickHouse/ClickHouse/pull/43686) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Improvement +* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)). +* Substitute UDFs in `CREATE` query to avoid failures during loading at startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)). +* Change how the following queries delete parts: TRUNCATE TABLE, ALTER TABLE DROP PART, ALTER TABLE DROP PARTITION. Now, these queries make empty parts which cover the old parts. This makes the TRUNCATE query work without a followedexclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If the request succeeds, then no resurrected parts appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)). +* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)). +* Show read rows in the progress indication while reading from STDIN from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)). +* Integration with LDAP: increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)). +* Allow the removal of sensitive information (see the `query_masking_rules` in the configuration file) from the exception messages as well. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). +* Support queries like `SHOW FULL TABLES ...` for MySQL compatibility. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)). +* Apply connection timeout settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)). +* The `unhex` function now supports `FixedString` arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)). +* Priority is given to deleting completely expired parts according to the TTL rules, see [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)). +* More precise and reactive CPU load indication in clickhouse-client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)). +* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)). +* Add `table_uuid` column to the `system.parts` table. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)). +* Added client option to display the number of locally processed rows in non-interactive mode (`--print-num-processed-rows`). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)). +* Implement `aggregation-in-order` optimization on top of a query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use the previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow to collect profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of the increment. It can be enabled by the setting `trace_profile_events` and used to investigate performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)). +* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)). +* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correctly report errors in queries even when multiple JOINs optimization is taking place. [#43583](https://github.com/ClickHouse/ClickHouse/pull/43583) ([Salvatore](https://github.com/tbsal)). + +#### Build/Testing/Packaging Improvement + +* Systemd integration now correctly notifies systemd that the service is really started and is ready to serve requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). +* Added the option to build ClickHouse with OpenSSL using the [OpenSSL FIPS Module](https://www.openssl.org/docs/man3.0/man7/fips_module.html). This build type has not been tested to validate security and is not supported. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). +* Upgrade to the new `DeflateQpl` compression codec which has been implemented in a previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0. 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some incorrect logic in AST level optimization `optimize_normalize_count_variants`. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)). +* Fix a case when mutations are not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)). +* Fix the `skip_unavailable_shards` optimization which did not work with the `hdfsCluster` table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)). +* Fix `s3` support for the `?` wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)). +* Fix functions `arrayFirstOrNull` and `arrayLastOrNull` or null when the array contains `Nullable` elements. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)). +* Fix incorrect `UserTimeMicroseconds`/`SystemTimeMicroseconds` accounting related to Kafka tables. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). +* Do not suppress exceptions in `web` disks. Fix retries for the `web` disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)). +* Fixed (logical) race condition between inserts and dropping materialized views. A race condition happened when a Materialized View was dropped at the same time as an INSERT, where the MVs were present as a dependency of the insert at the begining of the execution, but the table has been dropped by the time the insert chain tries to access it, producing either an `UNKNOWN_TABLE` or `TABLE_IS_DROPPED` exception, and stopping the insertion. After this change, we avoid these exceptions and just continue with the insert if the dependency is gone. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)). +* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)). +* Flatten arrays from Parquet to avoid an issue with inconsistent data in arrays. These incorrect files can be generated by Apache Iceberg. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix bad cast from `LowCardinality` column when using short circuit function execution. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). +* Check and compare the content of the `format_version` file in `MergeTreeData` so that tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into `Buffer` tables. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)). +* Fix a bug that allowed the parser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)). +* `MaterializeMySQL` (experimental feature) support DDL: `drop table t1, t2` and compatible with most of MySQL DROP DDL. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)). +* `session_log` (experimental feature): Fixed the inability to log in (because of failure to create the session_log entry) in a very rare case of messed up setting profiles. [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix possible `Cannot create non-empty column with type Nothing` in functions `if`/`multiIf`. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug when a row level filter uses the default value of a column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Query with `DISTINCT` + `LIMIT BY` + `LIMIT` can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `sumMap` for `Nullable(Decimal(...))`. [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). +* Fix `date_diff` for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)). +* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). This error has been found by @tisonkun. +* Ensure consistency when `clickhouse-copier` updates status and `attach_is_done` in Keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lzydmxy](https://github.com/lzydmxy)). +* During the recovery of a lost replica of a `Replicated` database (experimental feature), there could a situation where we need to atomically swap two table names (use EXCHANGE). Previously we tried to use two RENAME queries, which was obviously failing and moreover, failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix the case when the `s3Cluster` function throws `NOT_FOUND_COLUMN_IN_BLOCK` error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)). +* Fix possible logical error `Array sizes mismatched` while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed possible exception in the case of distributed `GROUP BY` with an `ALIAS` column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bug which can lead to broken projections if zero-copy replication (experimental feature) is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)). +* Fix using multipart upload for very large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)). +* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could have been applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). +* Fix a logical error in JOIN with `Join` table engine at right hand side, if `USING` is being used. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)). Fix a bug with wrong order of keys in `Join` table engine. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)). +* Keeper fix: throw if the interserver port for Raft is already in use. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix ORDER BY positional argument (example: `ORDER BY 1, 2`) in case of unneeded columns pruning from subqueries. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixed exception when a subquery contains HAVING but doesn't contain an actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)). +* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)). + + ### ClickHouse release 22.11, 2022-11-17 #### Backward Incompatible Change @@ -534,30 +653,30 @@ * Add counters (ProfileEvents) for cases when query complexity limitation has been set and has reached (a separate counter for `overflow_mode` = `break` and `throw`). For example, if you have set up `max_rows_to_read` with `read_overflow_mode = 'break'`, looking at the value of `OverflowBreak` counter will allow distinguishing incomplete results. [#40205](https://github.com/ClickHouse/ClickHouse/pull/40205) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix memory accounting in case of "Memory limit exceeded" errors (previously [peak] memory usage was takes failed allocations into account). [#40249](https://github.com/ClickHouse/ClickHouse/pull/40249) ([Azat Khuzhin](https://github.com/azat)). * Add metrics for filesystem cache: `FilesystemCacheSize` and `FilesystemCacheElements`. [#40260](https://github.com/ClickHouse/ClickHouse/pull/40260) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Support hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)). +* Support Hadoop secure RPC transfer (hadoop.rpc.protection=privacy and hadoop.rpc.protection=integrity). [#39411](https://github.com/ClickHouse/ClickHouse/pull/39411) ([michael1589](https://github.com/michael1589)). * Avoid continuously growing memory consumption of pattern cache when using functions multi(Fuzzy)Match(Any|AllIndices|AnyIndex)(). [#40264](https://github.com/ClickHouse/ClickHouse/pull/40264) ([Robert Schulze](https://github.com/rschu1ze)). -* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from cache if data wasn't changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)). +* Add cache for schema inference for file/s3/hdfs/url table functions. Now, schema inference will be performed only on the first query to the file, all subsequent queries to the same file will use the schema from the cache if data has not changed. Add system table system.schema_inference_cache with all current schemas in cache and system queries SYSTEM DROP SCHEMA CACHE [FOR FILE/S3/HDFS/URL] to drop schemas from cache. [#38286](https://github.com/ClickHouse/ClickHouse/pull/38286) ([Kruglov Pavel](https://github.com/Avogar)). * Add support for LARGE_BINARY/LARGE_STRING with Arrow (Closes [#32401](https://github.com/ClickHouse/ClickHouse/issues/32401)). [#40293](https://github.com/ClickHouse/ClickHouse/pull/40293) ([Josh Taylor](https://github.com/joshuataylor)). #### Build/Testing/Packaging Improvement * [ClickFiddle](https://fiddle.clickhouse.com/): A new tool for testing ClickHouse versions in read/write mode (**Igor Baliuk**). * ClickHouse binary is made self-extracting [#35775](https://github.com/ClickHouse/ClickHouse/pull/35775) ([Yakov Olkhovskiy, Arthur Filatenkov](https://github.com/yakov-olkhovskiy)). -* Update tzdata to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently, after it falls back on 2022-09-21. There are corrections of the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update `tzdata` to 2022b to support the new timezone changes. See https://github.com/google/cctz/pull/226. Chile's 2022 DST start is delayed from September 4 to September 11. Iran plans to stop observing DST permanently after it falls back on 2022-09-21. There are corrections to the historical time zone of Asia/Tehran in the year 1977: Iran adopted standard time in 1935, not 1946. In 1977 it observed DST from 03-21 23:00 to 10-20 24:00; its 1978 transitions were on 03-24 and 08-05, not 03-20 and 10-20; and its spring 1979 transition was on 05-27, not 03-21 (https://data.iana.org/time-zones/tzdb/NEWS). ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Former packages used to install systemd.service file to `/etc`. The files there are marked as `conf` and are not cleaned out, and are not updated automatically. This PR cleans them out. [#39323](https://github.com/ClickHouse/ClickHouse/pull/39323) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Ensure LSan is effective. [#39430](https://github.com/ClickHouse/ClickHouse/pull/39430) ([Azat Khuzhin](https://github.com/azat)). * TSAN has issues with clang-14 (https://github.com/google/sanitizers/issues/1552, https://github.com/google/sanitizers/issues/1540), so here we build the TSAN binaries with clang-15. [#39450](https://github.com/ClickHouse/ClickHouse/pull/39450) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Remove the option to build ClickHouse tools as separate executable programs. This fixes [#37847](https://github.com/ClickHouse/ClickHouse/issues/37847). [#39520](https://github.com/ClickHouse/ClickHouse/pull/39520) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Small preparations for build on s390x (which is big-endian). [#39627](https://github.com/ClickHouse/ClickHouse/pull/39627) ([Harry Lee](https://github.com/HarryLeeIBM)). [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issue in BitHelpers for s390x. [#39656](https://github.com/ClickHouse/ClickHouse/pull/39656) ([Harry Lee](https://github.com/HarryLeeIBM)). Implement a piece of code related to SipHash for s390x architecture (which is not supported by ClickHouse). [#39732](https://github.com/ClickHouse/ClickHouse/pull/39732) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed an Endian issue in the Coordination snapshot code for s390x architecture (which is not supported by ClickHouse). [#39931](https://github.com/ClickHouse/ClickHouse/pull/39931) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in Codec code for s390x architecture (which is not supported by ClickHouse). [#40008](https://github.com/ClickHouse/ClickHouse/pull/40008) ([Harry Lee](https://github.com/HarryLeeIBM)). Fixed Endian issues in reading/writing BigEndian binary data in ReadHelpers and WriteHelpers code for s390x architecture (which is not supported by ClickHouse). [#40179](https://github.com/ClickHouse/ClickHouse/pull/40179) ([Harry Lee](https://github.com/HarryLeeIBM)). * Support build with `clang-16` (trunk). This closes [#39949](https://github.com/ClickHouse/ClickHouse/issues/39949). [#40181](https://github.com/ClickHouse/ClickHouse/pull/40181) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Prepare RISC-V 64 build to run in CI. This is for [#40141](https://github.com/ClickHouse/ClickHouse/issues/40141). [#40197](https://github.com/ClickHouse/ClickHouse/pull/40197) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Simplified function registration macro interface (`FUNCTION_REGISTER*`) to eliminate the step to add and call an extern function in the registerFunctions.cpp, it also makes incremental builds of a new function faster. [#38615](https://github.com/ClickHouse/ClickHouse/pull/38615) ([Li Yin](https://github.com/liyinsg)). -* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it found in config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Docker: Now entrypoint.sh in docker image creates and executes chown for all folders it finds in the config for multidisk setup [#17717](https://github.com/ClickHouse/ClickHouse/issues/17717). [#39121](https://github.com/ClickHouse/ClickHouse/pull/39121) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). #### Bug Fix -* Fix possible segfault in `CapnProto` input format. This bug was found and send through ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix a very rare case of incorrect behavior of array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible segfault in `CapnProto` input format. This bug was found and sent in through the ClickHouse bug-bounty [program](https://github.com/ClickHouse/ClickHouse/issues/38986) by *kiojj*. [#40241](https://github.com/ClickHouse/ClickHouse/pull/40241) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a very rare case of incorrect behavior of the array subscript operator. This closes [#28720](https://github.com/ClickHouse/ClickHouse/issues/28720). [#40185](https://github.com/ClickHouse/ClickHouse/pull/40185) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix insufficient argument check for encryption functions (found by query fuzzer). This closes [#39987](https://github.com/ClickHouse/ClickHouse/issues/39987). [#40194](https://github.com/ClickHouse/ClickHouse/pull/40194) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix the case when the order of columns can be incorrect if the `IN` operator is used with a table with `ENGINE = Set` containing multiple columns. This fixes [#13014](https://github.com/ClickHouse/ClickHouse/issues/13014). [#40225](https://github.com/ClickHouse/ClickHouse/pull/40225) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fix seeking while reading from encrypted disk. This PR fixes [#38381](https://github.com/ClickHouse/ClickHouse/issues/38381). [#39687](https://github.com/ClickHouse/ClickHouse/pull/39687) ([Vitaly Baranov](https://github.com/vitlibar)). diff --git a/CMakeLists.txt b/CMakeLists.txt index f96148567da..99997db96a1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -448,12 +448,7 @@ else() link_libraries(global-group) endif () -if (NOT (OS_LINUX OR OS_DARWIN)) - # Using system libs can cause a lot of warnings in includes (on macro expansion). - option(WERROR "Enable -Werror compiler option" OFF) -else () - option(WERROR "Enable -Werror compiler option" ON) -endif () +option(WERROR "Enable -Werror compiler option" ON) if (WERROR) # Don't pollute CMAKE_CXX_FLAGS with -Werror as it will break some CMake checks. @@ -614,6 +609,8 @@ if (NATIVE_BUILD_TARGETS "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" "-DENABLE_CCACHE=${ENABLE_CCACHE}" + # Avoid overriding .cargo/config.toml with native toolchain. + "-DENABLE_RUST=OFF" "-DENABLE_CLICKHOUSE_SELF_EXTRACTING=${ENABLE_CLICKHOUSE_SELF_EXTRACTING}" ${CMAKE_SOURCE_DIR} WORKING_DIRECTORY "${NATIVE_BUILD_DIR}" diff --git a/README.md b/README.md index 59c9c180c90..35580369fd0 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,6 @@ ClickHouse® is an open-source column-oriented database management system that a * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming events -* [**v22.12 Release Webinar**](https://clickhouse.com/company/events/v22-12-release-webinar) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. +* [**v22.12 Release Webinar**](https://clickhouse.com/company/events/v22-12-release-webinar) 22.12 is the ClickHouse Christmas release. There are plenty of gifts (a new JOIN algorithm among them) and we adopted something from MongoDB. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. * [**ClickHouse Meetup at the CHEQ office in Tel Aviv**](https://www.meetup.com/clickhouse-tel-aviv-user-group/events/289599423/) - Jan 16 - We are very excited to be holding our next in-person ClickHouse meetup at the CHEQ office in Tel Aviv! Hear from CHEQ, ServiceNow and Contentsquare, as well as a deep dive presentation from ClickHouse CTO Alexey Milovidov. Join us for a fun evening of talks, food and discussion! -* **ClickHouse Meetup in Seattle* - Keep an eye on this space as we will be announcing a January meetup in Seattle soon! +* [**ClickHouse Meetup at Microsoft Office in Seattle**](https://www.meetup.com/clickhouse-seattle-user-group/events/290310025/) - Jan 18 - Keep an eye on this space as we will be announcing speakers soon! diff --git a/SECURITY.md b/SECURITY.md index a4f431d7552..3dcdc5db009 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,9 +13,10 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| +| 22.12 | ✔️ | | 22.11 | ✔️ | | 22.10 | ✔️ | -| 22.9 | ✔️ | +| 22.9 | ❌ | | 22.8 | ✔️ | | 22.7 | ❌ | | 22.6 | ❌ | diff --git a/base/base/CMakeLists.txt b/base/base/CMakeLists.txt index 175a4836e64..d788bd6f092 100644 --- a/base/base/CMakeLists.txt +++ b/base/base/CMakeLists.txt @@ -40,6 +40,11 @@ else () target_compile_definitions(common PUBLIC WITH_COVERAGE=0) endif () +# FIXME: move libraries for line reading out from base +if (TARGET ch_rust::skim) + target_link_libraries(common PUBLIC ch_rust::skim) +endif() + target_include_directories(common PUBLIC .. "${CMAKE_CURRENT_BINARY_DIR}/..") if (OS_DARWIN AND NOT USE_STATIC_LIBRARIES) diff --git a/base/base/ReplxxLineReader.cpp b/base/base/ReplxxLineReader.cpp index b86746365b7..9e29f7744fa 100644 --- a/base/base/ReplxxLineReader.cpp +++ b/base/base/ReplxxLineReader.cpp @@ -16,9 +16,11 @@ #include #include #include -#include -#include -#include /// is_any_of +#include "config.h" // USE_SKIM + +#if USE_SKIM +#include +#endif namespace { @@ -39,36 +41,6 @@ std::string getEditor() return editor; } -std::pair getFuzzyFinder() -{ - const char * env_path = std::getenv("PATH"); // NOLINT(concurrency-mt-unsafe) - - if (!env_path || !*env_path) - return {}; - - std::vector paths; - boost::split(paths, env_path, boost::is_any_of(":")); - for (const auto & path_str : paths) - { - std::filesystem::path path(path_str); - std::filesystem::path sk_bin_path = path / "sk"; - if (!access(sk_bin_path.c_str(), X_OK)) - return {sk_bin_path, FUZZY_FINDER_SKIM}; - - std::filesystem::path fzf_bin_path = path / "fzf"; - if (!access(fzf_bin_path.c_str(), X_OK)) - return {fzf_bin_path, FUZZY_FINDER_FZF}; - } - - return {"", FUZZY_FINDER_NONE}; -} - -String escapeShellArgument(std::string arg) -{ - boost::replace_all(arg, "'", "'\\''"); - return fmt::format("'{}'", arg); -} - /// See comments in ShellCommand::executeImpl() /// (for the vfork via dlsym()) int executeCommand(char * const argv[]) @@ -316,8 +288,6 @@ ReplxxLineReader::ReplxxLineReader( using namespace std::placeholders; using Replxx = replxx::Replxx; - std::tie(fuzzy_finder, fuzzy_finder_type) = getFuzzyFinder(); - if (!history_file_path.empty()) { history_file_fd = open(history_file_path.c_str(), O_RDWR); @@ -422,17 +392,48 @@ ReplxxLineReader::ReplxxLineReader( }; rx.bind_key(Replxx::KEY::meta('#'), insert_comment_action); - /// interactive search in history (requires fzf/sk) - if (fuzzy_finder_type != FUZZY_FINDER_NONE) +#if USE_SKIM + auto interactive_history_search = [this](char32_t code) { - auto interactive_history_search = [this](char32_t code) + std::vector words; { - openInteractiveHistorySearch(); - rx.invoke(Replxx::ACTION::CLEAR_SELF, code); - return rx.invoke(Replxx::ACTION::REPAINT, code); - }; - rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); - } + auto hs(rx.history_scan()); + while (hs.next()) + words.push_back(hs.get().text()); + } + + std::string new_query; + try + { + new_query = std::string(skim(words)); + } + catch (const std::exception & e) + { + rx.print("skim failed: %s (consider using Ctrl-T for a regular non-fuzzy reverse search)\n", e.what()); + } + if (!new_query.empty()) + rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); + + if (bracketed_paste_enabled) + enableBracketedPaste(); + + rx.invoke(Replxx::ACTION::CLEAR_SELF, code); + return rx.invoke(Replxx::ACTION::REPAINT, code); + }; + + rx.bind_key(Replxx::KEY::control('R'), interactive_history_search); + + /// Rebind regular incremental search to C-T. + /// + /// NOTE: C-T by default this is a binding to swap adjustent chars + /// (TRANSPOSE_CHARACTERS), but for SQL it sounds pretty useless. + rx.bind_key(Replxx::KEY::control('T'), [this](char32_t) + { + /// Reverse search is detected by C-R. + uint32_t reverse_search = Replxx::KEY::control('R'); + return rx.invoke(Replxx::ACTION::HISTORY_INCREMENTAL_SEARCH, reverse_search); + }); +#endif } ReplxxLineReader::~ReplxxLineReader() @@ -501,65 +502,6 @@ void ReplxxLineReader::openEditor() enableBracketedPaste(); } -void ReplxxLineReader::openInteractiveHistorySearch() -{ - assert(!fuzzy_finder.empty()); - TemporaryFile history_file("clickhouse_client_history_in_XXXXXX.bin"); - auto hs(rx.history_scan()); - while (hs.next()) - { - history_file.write(hs.get().text()); - history_file.write(std::string(1, '\0')); - } - history_file.close(); - - TemporaryFile output_file("clickhouse_client_history_out_XXXXXX.sql"); - output_file.close(); - - char sh[] = "sh"; - char sh_c[] = "-c"; - /// NOTE: You can use one of the following to configure the behaviour additionally: - /// - SKIM_DEFAULT_OPTIONS - /// - FZF_DEFAULT_OPTS - /// - /// And also note, that fzf and skim is 95% compatible (at least option - /// that is used here) - std::string fuzzy_finder_command = fmt::format("{} --read0 --height=30%", fuzzy_finder); - switch (fuzzy_finder_type) - { - case FUZZY_FINDER_SKIM: - fuzzy_finder_command += " --tac --tiebreak=-score"; - break; - case FUZZY_FINDER_FZF: - fuzzy_finder_command += " --tac --tiebreak=index"; - break; - case FUZZY_FINDER_NONE: - /// assertion for !fuzzy_finder.empty() is enough - break; - } - fuzzy_finder_command += fmt::format(" < {} > {}", - escapeShellArgument(history_file.getPath()), - escapeShellArgument(output_file.getPath())); - char * const argv[] = {sh, sh_c, fuzzy_finder_command.data(), nullptr}; - - try - { - if (executeCommand(argv) == 0) - { - std::string new_query = readFile(output_file.getPath()); - rightTrim(new_query); - rx.set_state(replxx::Replxx::State(new_query.c_str(), static_cast(new_query.size()))); - } - } - catch (const std::runtime_error & e) - { - rx.print(e.what()); - } - - if (bracketed_paste_enabled) - enableBracketedPaste(); -} - void ReplxxLineReader::enableBracketedPaste() { bracketed_paste_enabled = true; diff --git a/base/base/ReplxxLineReader.h b/base/base/ReplxxLineReader.h index 9be3b3aa993..428fbf144c3 100644 --- a/base/base/ReplxxLineReader.h +++ b/base/base/ReplxxLineReader.h @@ -4,15 +4,6 @@ #include -enum FuzzyFinderType -{ - FUZZY_FINDER_NONE, - /// Use https://github.com/junegunn/fzf - FUZZY_FINDER_FZF, - /// Use https://github.com/lotabout/skim - FUZZY_FINDER_SKIM, -}; - class ReplxxLineReader : public LineReader { public: @@ -35,7 +26,6 @@ private: void addToHistory(const String & line) override; int executeEditor(const std::string & path); void openEditor(); - void openInteractiveHistorySearch(); replxx::Replxx rx; replxx::Replxx::highlighter_callback_t highlighter; @@ -45,6 +35,4 @@ private: bool bracketed_paste_enabled = false; std::string editor; - std::string fuzzy_finder; - FuzzyFinderType fuzzy_finder_type = FUZZY_FINDER_NONE; }; diff --git a/base/readpassphrase/readpassphrase.c b/base/readpassphrase/readpassphrase.c index b8707770e6c..a84ec43767c 100644 --- a/base/readpassphrase/readpassphrase.c +++ b/base/readpassphrase/readpassphrase.c @@ -153,7 +153,7 @@ restart: /* Restore old terminal settings and signals. */ if (memcmp(&term, &oterm, sizeof(term)) != 0) { - const int sigttou = signo[SIGTTOU]; + const int sigttou = (int)signo[SIGTTOU]; /* Ignore SIGTTOU generated when we are not the fg pgrp. */ while (tcsetattr(input, TCSAFLUSH|TCSASOFT, &oterm) == -1 && diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index d06d3918612..87b11c46f45 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54469) +SET(VERSION_REVISION 54470) SET(VERSION_MAJOR 22) -SET(VERSION_MINOR 12) +SET(VERSION_MINOR 13) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 0d211ed19849fe44b0e43fdebe2c15d76d560a77) -SET(VERSION_DESCRIBE v22.12.1.1-testing) -SET(VERSION_STRING 22.12.1.1) +SET(VERSION_GITHASH 688e488e930c83eefeac4f87c4cc029cc5b231e3) +SET(VERSION_DESCRIBE v22.13.1.1-testing) +SET(VERSION_STRING 22.13.1.1) # end of autochange diff --git a/cmake/darwin/toolchain-aarch64.cmake b/cmake/darwin/toolchain-aarch64.cmake index 81398111495..569b02bb642 100644 --- a/cmake/darwin/toolchain-aarch64.cmake +++ b/cmake/darwin/toolchain-aarch64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "Darwin") set (CMAKE_SYSTEM_PROCESSOR "aarch64") set (CMAKE_C_COMPILER_TARGET "aarch64-apple-darwin") diff --git a/cmake/darwin/toolchain-x86_64.cmake b/cmake/darwin/toolchain-x86_64.cmake index 0be81dfa753..c4527d2fc0d 100644 --- a/cmake/darwin/toolchain-x86_64.cmake +++ b/cmake/darwin/toolchain-x86_64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "Darwin") set (CMAKE_SYSTEM_PROCESSOR "x86_64") set (CMAKE_C_COMPILER_TARGET "x86_64-apple-darwin") diff --git a/cmake/freebsd/toolchain-aarch64.cmake b/cmake/freebsd/toolchain-aarch64.cmake index eeec635cc06..8a8da00f3be 100644 --- a/cmake/freebsd/toolchain-aarch64.cmake +++ b/cmake/freebsd/toolchain-aarch64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "FreeBSD") set (CMAKE_SYSTEM_PROCESSOR "aarch64") set (CMAKE_C_COMPILER_TARGET "aarch64-unknown-freebsd12") diff --git a/cmake/freebsd/toolchain-ppc64le.cmake b/cmake/freebsd/toolchain-ppc64le.cmake index d6007befb67..c3f6594204d 100644 --- a/cmake/freebsd/toolchain-ppc64le.cmake +++ b/cmake/freebsd/toolchain-ppc64le.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "FreeBSD") set (CMAKE_SYSTEM_PROCESSOR "ppc64le") set (CMAKE_C_COMPILER_TARGET "powerpc64le-unknown-freebsd13") diff --git a/cmake/freebsd/toolchain-x86_64.cmake b/cmake/freebsd/toolchain-x86_64.cmake index 80cbeba549f..460de6a7d39 100644 --- a/cmake/freebsd/toolchain-x86_64.cmake +++ b/cmake/freebsd/toolchain-x86_64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_SYSTEM_NAME "FreeBSD") set (CMAKE_SYSTEM_PROCESSOR "x86_64") set (CMAKE_C_COMPILER_TARGET "x86_64-pc-freebsd11") diff --git a/cmake/linux/toolchain-aarch64.cmake b/cmake/linux/toolchain-aarch64.cmake index 5db71aecf9a..2dedef8859f 100644 --- a/cmake/linux/toolchain-aarch64.cmake +++ b/cmake/linux/toolchain-aarch64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-ppc64le.cmake b/cmake/linux/toolchain-ppc64le.cmake index 345de208234..8eb2aab34e9 100644 --- a/cmake/linux/toolchain-ppc64le.cmake +++ b/cmake/linux/toolchain-ppc64le.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-riscv64.cmake b/cmake/linux/toolchain-riscv64.cmake index 02c3d0c97fc..49a036c2972 100644 --- a/cmake/linux/toolchain-riscv64.cmake +++ b/cmake/linux/toolchain-riscv64.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-x86_64-musl.cmake b/cmake/linux/toolchain-x86_64-musl.cmake index 3eb2077db2b..bc327e5ac25 100644 --- a/cmake/linux/toolchain-x86_64-musl.cmake +++ b/cmake/linux/toolchain-x86_64-musl.cmake @@ -1,3 +1,6 @@ +# See linux/toolchain-x86_64.cmake for details about multiple load of toolchain file. +include_guard(GLOBAL) + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") diff --git a/cmake/linux/toolchain-x86_64.cmake b/cmake/linux/toolchain-x86_64.cmake index bdcfcfa013a..e73d779284a 100644 --- a/cmake/linux/toolchain-x86_64.cmake +++ b/cmake/linux/toolchain-x86_64.cmake @@ -1,18 +1,15 @@ -if (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED) - # During first run of cmake the toolchain file will be loaded twice, - # - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake - # - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake - # - # But once you already have non-empty cmake cache it will be loaded only - # once: - # - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake - # - # This has no harm except for double load of toolchain will add - # --gcc-toolchain multiple times that will not allow ccache to reuse the - # cache. - return() -endif() -set (_CLICKHOUSE_TOOLCHAIN_FILE_LOADED ON) +# During first run of cmake the toolchain file will be loaded twice, +# - /usr/share/cmake-3.23/Modules/CMakeDetermineSystem.cmake +# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake +# +# But once you already have non-empty cmake cache it will be loaded only +# once: +# - /bld/CMakeFiles/3.23.2/CMakeSystem.cmake +# +# This has no harm except for double load of toolchain will add +# --gcc-toolchain multiple times that will not allow ccache to reuse the +# cache. +include_guard(GLOBAL) set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2e05b318b8f..6f80059498e 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -65,7 +65,7 @@ add_contrib (dragonbox-cmake dragonbox) add_contrib (vectorscan-cmake vectorscan) add_contrib (jemalloc-cmake jemalloc) add_contrib (libcpuid-cmake libcpuid) -add_contrib (libdivide) +add_contrib (libdivide-cmake) add_contrib (libmetrohash) add_contrib (lz4-cmake lz4) add_contrib (murmurhash) diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index ef810182a40..ea8f191564d 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -10,9 +10,6 @@ else() endif() option(ENABLE_RUST "Enable rust" ${DEFAULT_ENABLE_RUST}) - -message(STATUS ${ENABLE_RUST}) - if(NOT ENABLE_RUST) message(STATUS "Not using rust") return() @@ -42,5 +39,7 @@ endif() message(STATUS "Switched Rust target to ${Rust_CARGO_TARGET}") +# FindRust.cmake +list(APPEND CMAKE_MODULE_PATH "${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake") # Define function corrosion_import_crate() include ("${ClickHouse_SOURCE_DIR}/contrib/corrosion/cmake/Corrosion.cmake") diff --git a/contrib/libdivide b/contrib/libdivide new file mode 160000 index 00000000000..3bd34388573 --- /dev/null +++ b/contrib/libdivide @@ -0,0 +1 @@ +Subproject commit 3bd34388573681ce563348cdf04fe15d24770d04 diff --git a/contrib/libdivide-cmake/CMakeLists.txt b/contrib/libdivide-cmake/CMakeLists.txt new file mode 100644 index 00000000000..3174808bc23 --- /dev/null +++ b/contrib/libdivide-cmake/CMakeLists.txt @@ -0,0 +1,7 @@ +set(LIBDIVIDE_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libdivide") +add_library (_libdivide INTERFACE) +# for libdivide.h +target_include_directories (_libdivide SYSTEM BEFORE INTERFACE ${LIBDIVIDE_SOURCE_DIR}) +# for libdivide-config.h +target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .) +add_library (ch_contrib::libdivide ALIAS _libdivide) diff --git a/contrib/libdivide-cmake/libdivide-config.h b/contrib/libdivide-cmake/libdivide-config.h new file mode 100644 index 00000000000..8ef001fb97b --- /dev/null +++ b/contrib/libdivide-cmake/libdivide-config.h @@ -0,0 +1,9 @@ +#if defined(__SSE2__) +# define LIBDIVIDE_SSE2 +#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) +# define LIBDIVIDE_AVX512 +#elif defined(__AVX2__) +# define LIBDIVIDE_AVX2 +#elif defined(__aarch64__) && defined(__ARM_NEON) +# define LIBDIVIDE_NEON +#endif diff --git a/contrib/libdivide/CMakeLists.txt b/contrib/libdivide/CMakeLists.txt deleted file mode 100644 index 45cbc0a584b..00000000000 --- a/contrib/libdivide/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -add_library (_libdivide INTERFACE) -target_include_directories (_libdivide SYSTEM BEFORE INTERFACE .) -add_library (ch_contrib::libdivide ALIAS _libdivide) diff --git a/contrib/libdivide/LICENSE.txt b/contrib/libdivide/LICENSE.txt deleted file mode 100644 index d056b847bba..00000000000 --- a/contrib/libdivide/LICENSE.txt +++ /dev/null @@ -1,20 +0,0 @@ - libdivide - Copyright (C) 2010 ridiculous_fish - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - libdivide@ridiculousfish.com diff --git a/contrib/libdivide/README.txt b/contrib/libdivide/README.txt deleted file mode 100644 index 2d17a68e4c2..00000000000 --- a/contrib/libdivide/README.txt +++ /dev/null @@ -1,2 +0,0 @@ -https://github.com/ridiculousfish/libdivide -http://libdivide.com/ diff --git a/contrib/libdivide/libdivide.h b/contrib/libdivide/libdivide.h deleted file mode 100644 index 33d210310a1..00000000000 --- a/contrib/libdivide/libdivide.h +++ /dev/null @@ -1,2503 +0,0 @@ -// libdivide.h - Optimized integer division -// https://libdivide.com -// -// Copyright (C) 2010 - 2019 ridiculous_fish, -// Copyright (C) 2016 - 2019 Kim Walisch, -// -// libdivide is dual-licensed under the Boost or zlib licenses. -// You may use libdivide under the terms of either of these. -// See LICENSE.txt for more details. - -#ifndef LIBDIVIDE_H -#define LIBDIVIDE_H - -#define LIBDIVIDE_VERSION "3.0" -#define LIBDIVIDE_VERSION_MAJOR 3 -#define LIBDIVIDE_VERSION_MINOR 0 - -#include - -#if defined(__cplusplus) -#include -#include -#include -#else -#include -#include -#endif - -#if defined(LIBDIVIDE_SSE2) -#include -#endif -#if defined(LIBDIVIDE_AVX2) || defined(LIBDIVIDE_AVX512) -#include -#endif -#if defined(LIBDIVIDE_NEON) -#include -#endif - -#if defined(_MSC_VER) -#include -// disable warning C4146: unary minus operator applied -// to unsigned type, result still unsigned -#pragma warning(disable : 4146) -#define LIBDIVIDE_VC -#endif - -#if !defined(__has_builtin) -#define __has_builtin(x) 0 -#endif - -#if defined(__SIZEOF_INT128__) -#define HAS_INT128_T -// clang-cl on Windows does not yet support 128-bit division -#if !(defined(__clang__) && defined(LIBDIVIDE_VC)) -#define HAS_INT128_DIV -#endif -#endif - -#if defined(__x86_64__) || defined(_M_X64) -#define LIBDIVIDE_X86_64 -#endif - -#if defined(__i386__) -#define LIBDIVIDE_i386 -#endif - -#if defined(__GNUC__) || defined(__clang__) -#define LIBDIVIDE_GCC_STYLE_ASM -#endif - -#if defined(__cplusplus) || defined(LIBDIVIDE_VC) -#define LIBDIVIDE_FUNCTION __FUNCTION__ -#else -#define LIBDIVIDE_FUNCTION __func__ -#endif - -#define LIBDIVIDE_ERROR(msg) \ - do { \ - fprintf(stderr, "libdivide.h:%d: %s(): Error: %s\n", __LINE__, LIBDIVIDE_FUNCTION, msg); \ - abort(); \ - } while (0) - -#if defined(LIBDIVIDE_ASSERTIONS_ON) -#define LIBDIVIDE_ASSERT(x) \ - do { \ - if (!(x)) { \ - fprintf(stderr, "libdivide.h:%d: %s(): Assertion failed: %s\n", __LINE__, \ - LIBDIVIDE_FUNCTION, #x); \ - abort(); \ - } \ - } while (0) -#else -#define LIBDIVIDE_ASSERT(x) -#endif - -#ifdef __cplusplus -namespace libdivide { -#endif - -// pack divider structs to prevent compilers from padding. -// This reduces memory usage by up to 43% when using a large -// array of libdivide dividers and improves performance -// by up to 10% because of reduced memory bandwidth. -#pragma pack(push, 1) - -struct libdivide_u32_t { - uint32_t magic; - uint8_t more; -}; - -struct libdivide_s32_t { - int32_t magic; - uint8_t more; -}; - -struct libdivide_u64_t { - uint64_t magic; - uint8_t more; -}; - -struct libdivide_s64_t { - int64_t magic; - uint8_t more; -}; - -struct libdivide_u32_branchfree_t { - uint32_t magic; - uint8_t more; -}; - -struct libdivide_s32_branchfree_t { - int32_t magic; - uint8_t more; -}; - -struct libdivide_u64_branchfree_t { - uint64_t magic; - uint8_t more; -}; - -struct libdivide_s64_branchfree_t { - int64_t magic; - uint8_t more; -}; - -#pragma pack(pop) - -// Explanation of the "more" field: -// -// * Bits 0-5 is the shift value (for shift path or mult path). -// * Bit 6 is the add indicator for mult path. -// * Bit 7 is set if the divisor is negative. We use bit 7 as the negative -// divisor indicator so that we can efficiently use sign extension to -// create a bitmask with all bits set to 1 (if the divisor is negative) -// or 0 (if the divisor is positive). -// -// u32: [0-4] shift value -// [5] ignored -// [6] add indicator -// magic number of 0 indicates shift path -// -// s32: [0-4] shift value -// [5] ignored -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// u64: [0-5] shift value -// [6] add indicator -// magic number of 0 indicates shift path -// -// s64: [0-5] shift value -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// In s32 and s64 branchfree modes, the magic number is negated according to -// whether the divisor is negated. In branchfree strategy, it is not negated. - -enum { - LIBDIVIDE_32_SHIFT_MASK = 0x1F, - LIBDIVIDE_64_SHIFT_MASK = 0x3F, - LIBDIVIDE_ADD_MARKER = 0x40, - LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 -}; - -static inline struct libdivide_s32_t libdivide_s32_gen(int32_t d); -static inline struct libdivide_u32_t libdivide_u32_gen(uint32_t d); -static inline struct libdivide_s64_t libdivide_s64_gen(int64_t d); -static inline struct libdivide_u64_t libdivide_u64_gen(uint64_t d); - -static inline struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d); -static inline struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d); -static inline struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d); -static inline struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d); - -static inline int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom); -static inline uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom); -static inline int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom); -static inline uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom); - -static inline int32_t libdivide_s32_branchfree_do( - int32_t numer, const struct libdivide_s32_branchfree_t *denom); -static inline uint32_t libdivide_u32_branchfree_do( - uint32_t numer, const struct libdivide_u32_branchfree_t *denom); -static inline int64_t libdivide_s64_branchfree_do( - int64_t numer, const struct libdivide_s64_branchfree_t *denom); -static inline uint64_t libdivide_u64_branchfree_do( - uint64_t numer, const struct libdivide_u64_branchfree_t *denom); - -static inline int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom); -static inline uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom); -static inline int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom); -static inline uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom); - -static inline int32_t libdivide_s32_branchfree_recover( - const struct libdivide_s32_branchfree_t *denom); -static inline uint32_t libdivide_u32_branchfree_recover( - const struct libdivide_u32_branchfree_t *denom); -static inline int64_t libdivide_s64_branchfree_recover( - const struct libdivide_s64_branchfree_t *denom); -static inline uint64_t libdivide_u64_branchfree_recover( - const struct libdivide_u64_branchfree_t *denom); - -//////// Internal Utility Functions - -static inline uint32_t libdivide_mullhi_u32(uint32_t x, uint32_t y) { - uint64_t xl = x, yl = y; - uint64_t rl = xl * yl; - return (uint32_t)(rl >> 32); -} - -static inline int32_t libdivide_mullhi_s32(int32_t x, int32_t y) { - int64_t xl = x, yl = y; - int64_t rl = xl * yl; - // needs to be arithmetic shift - return (int32_t)(rl >> 32); -} - -static inline uint64_t libdivide_mullhi_u64(uint64_t x, uint64_t y) { -#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) - return __umulh(x, y); -#elif defined(HAS_INT128_T) - __uint128_t xl = x, yl = y; - __uint128_t rl = xl * yl; - return (uint64_t)(rl >> 64); -#else - // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - uint32_t mask = 0xFFFFFFFF; - uint32_t x0 = (uint32_t)(x & mask); - uint32_t x1 = (uint32_t)(x >> 32); - uint32_t y0 = (uint32_t)(y & mask); - uint32_t y1 = (uint32_t)(y >> 32); - uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); - uint64_t x0y1 = x0 * (uint64_t)y1; - uint64_t x1y0 = x1 * (uint64_t)y0; - uint64_t x1y1 = x1 * (uint64_t)y1; - uint64_t temp = x1y0 + x0y0_hi; - uint64_t temp_lo = temp & mask; - uint64_t temp_hi = temp >> 32; - - return x1y1 + temp_hi + ((temp_lo + x0y1) >> 32); -#endif -} - -static inline int64_t libdivide_mullhi_s64(int64_t x, int64_t y) { -#if defined(LIBDIVIDE_VC) && defined(LIBDIVIDE_X86_64) - return __mulh(x, y); -#elif defined(HAS_INT128_T) - __int128_t xl = x, yl = y; - __int128_t rl = xl * yl; - return (int64_t)(rl >> 64); -#else - // full 128 bits are x0 * y0 + (x0 * y1 << 32) + (x1 * y0 << 32) + (x1 * y1 << 64) - uint32_t mask = 0xFFFFFFFF; - uint32_t x0 = (uint32_t)(x & mask); - uint32_t y0 = (uint32_t)(y & mask); - int32_t x1 = (int32_t)(x >> 32); - int32_t y1 = (int32_t)(y >> 32); - uint32_t x0y0_hi = libdivide_mullhi_u32(x0, y0); - int64_t t = x1 * (int64_t)y0 + x0y0_hi; - int64_t w1 = x0 * (int64_t)y1 + (t & mask); - - return x1 * (int64_t)y1 + (t >> 32) + (w1 >> 32); -#endif -} - -static inline int32_t libdivide_count_leading_zeros32(uint32_t val) { -#if defined(__GNUC__) || __has_builtin(__builtin_clz) - // Fast way to count leading zeros - return __builtin_clz(val); -#elif defined(LIBDIVIDE_VC) - unsigned long result; - if (_BitScanReverse(&result, val)) { - return 31 - result; - } - return 0; -#else - if (val == 0) return 32; - int32_t result = 8; - uint32_t hi = 0xFFU << 24; - while ((val & hi) == 0) { - hi >>= 8; - result += 8; - } - while (val & hi) { - result -= 1; - hi <<= 1; - } - return result; -#endif -} - -static inline int32_t libdivide_count_leading_zeros64(uint64_t val) { -#if defined(__GNUC__) || __has_builtin(__builtin_clzll) - // Fast way to count leading zeros - return __builtin_clzll(val); -#elif defined(LIBDIVIDE_VC) && defined(_WIN64) - unsigned long result; - if (_BitScanReverse64(&result, val)) { - return 63 - result; - } - return 0; -#else - uint32_t hi = val >> 32; - uint32_t lo = val & 0xFFFFFFFF; - if (hi != 0) return libdivide_count_leading_zeros32(hi); - return 32 + libdivide_count_leading_zeros32(lo); -#endif -} - -// libdivide_64_div_32_to_32: divides a 64-bit uint {u1, u0} by a 32-bit -// uint {v}. The result must fit in 32 bits. -// Returns the quotient directly and the remainder in *r -static inline uint32_t libdivide_64_div_32_to_32( - uint32_t u1, uint32_t u0, uint32_t v, uint32_t *r) { -#if (defined(LIBDIVIDE_i386) || defined(LIBDIVIDE_X86_64)) && defined(LIBDIVIDE_GCC_STYLE_ASM) - uint32_t result; - __asm__("divl %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1)); - return result; -#else - uint64_t n = ((uint64_t)u1 << 32) | u0; - uint32_t result = (uint32_t)(n / v); - *r = (uint32_t)(n - result * (uint64_t)v); - return result; -#endif -} - -// libdivide_128_div_64_to_64: divides a 128-bit uint {u1, u0} by a 64-bit -// uint {v}. The result must fit in 64 bits. -// Returns the quotient directly and the remainder in *r -static uint64_t libdivide_128_div_64_to_64(uint64_t u1, uint64_t u0, uint64_t v, uint64_t *r) { - // N.B. resist the temptation to use __uint128_t here. - // In LLVM compiler-rt, it performs a 128/128 -> 128 division which is many times slower than - // necessary. In gcc it's better but still slower than the divlu implementation, perhaps because - // it's not inlined. -#if defined(LIBDIVIDE_X86_64) && defined(LIBDIVIDE_GCC_STYLE_ASM) - uint64_t result; - __asm__("divq %[v]" : "=a"(result), "=d"(*r) : [v] "r"(v), "a"(u0), "d"(u1)); - return result; -#else - // Code taken from Hacker's Delight: - // http://www.hackersdelight.org/HDcode/divlu.c. - // License permits inclusion here per: - // http://www.hackersdelight.org/permissions.htm - - const uint64_t b = (1ULL << 32); // Number base (32 bits) - uint64_t un1, un0; // Norm. dividend LSD's - uint64_t vn1, vn0; // Norm. divisor digits - uint64_t q1, q0; // Quotient digits - uint64_t un64, un21, un10; // Dividend digit pairs - uint64_t rhat; // A remainder - int32_t s; // Shift amount for norm - - // If overflow, set rem. to an impossible value, - // and return the largest possible quotient - if (u1 >= v) { - *r = (uint64_t)-1; - return (uint64_t)-1; - } - - // count leading zeros - s = libdivide_count_leading_zeros64(v); - if (s > 0) { - // Normalize divisor - v = v << s; - un64 = (u1 << s) | (u0 >> (64 - s)); - un10 = u0 << s; // Shift dividend left - } else { - // Avoid undefined behavior of (u0 >> 64). - // The behavior is undefined if the right operand is - // negative, or greater than or equal to the length - // in bits of the promoted left operand. - un64 = u1; - un10 = u0; - } - - // Break divisor up into two 32-bit digits - vn1 = v >> 32; - vn0 = v & 0xFFFFFFFF; - - // Break right half of dividend into two digits - un1 = un10 >> 32; - un0 = un10 & 0xFFFFFFFF; - - // Compute the first quotient digit, q1 - q1 = un64 / vn1; - rhat = un64 - q1 * vn1; - - while (q1 >= b || q1 * vn0 > b * rhat + un1) { - q1 = q1 - 1; - rhat = rhat + vn1; - if (rhat >= b) break; - } - - // Multiply and subtract - un21 = un64 * b + un1 - q1 * v; - - // Compute the second quotient digit - q0 = un21 / vn1; - rhat = un21 - q0 * vn1; - - while (q0 >= b || q0 * vn0 > b * rhat + un0) { - q0 = q0 - 1; - rhat = rhat + vn1; - if (rhat >= b) break; - } - - *r = (un21 * b + un0 - q0 * v) >> s; - return q1 * b + q0; -#endif -} - -// Bitshift a u128 in place, left (signed_shift > 0) or right (signed_shift < 0) -static inline void libdivide_u128_shift(uint64_t *u1, uint64_t *u0, int32_t signed_shift) { - if (signed_shift > 0) { - uint32_t shift = signed_shift; - *u1 <<= shift; - *u1 |= *u0 >> (64 - shift); - *u0 <<= shift; - } else if (signed_shift < 0) { - uint32_t shift = -signed_shift; - *u0 >>= shift; - *u0 |= *u1 << (64 - shift); - *u1 >>= shift; - } -} - -// Computes a 128 / 128 -> 64 bit division, with a 128 bit remainder. -static uint64_t libdivide_128_div_128_to_64( - uint64_t u_hi, uint64_t u_lo, uint64_t v_hi, uint64_t v_lo, uint64_t *r_hi, uint64_t *r_lo) { -#if defined(HAS_INT128_T) && defined(HAS_INT128_DIV) - __uint128_t ufull = u_hi; - __uint128_t vfull = v_hi; - ufull = (ufull << 64) | u_lo; - vfull = (vfull << 64) | v_lo; - uint64_t res = (uint64_t)(ufull / vfull); - __uint128_t remainder = ufull - (vfull * res); - *r_lo = (uint64_t)remainder; - *r_hi = (uint64_t)(remainder >> 64); - return res; -#else - // Adapted from "Unsigned Doubleword Division" in Hacker's Delight - // We want to compute u / v - typedef struct { - uint64_t hi; - uint64_t lo; - } u128_t; - u128_t u = {u_hi, u_lo}; - u128_t v = {v_hi, v_lo}; - - if (v.hi == 0) { - // divisor v is a 64 bit value, so we just need one 128/64 division - // Note that we are simpler than Hacker's Delight here, because we know - // the quotient fits in 64 bits whereas Hacker's Delight demands a full - // 128 bit quotient - *r_hi = 0; - return libdivide_128_div_64_to_64(u.hi, u.lo, v.lo, r_lo); - } - // Here v >= 2**64 - // We know that v.hi != 0, so count leading zeros is OK - // We have 0 <= n <= 63 - uint32_t n = libdivide_count_leading_zeros64(v.hi); - - // Normalize the divisor so its MSB is 1 - u128_t v1t = v; - libdivide_u128_shift(&v1t.hi, &v1t.lo, n); - uint64_t v1 = v1t.hi; // i.e. v1 = v1t >> 64 - - // To ensure no overflow - u128_t u1 = u; - libdivide_u128_shift(&u1.hi, &u1.lo, -1); - - // Get quotient from divide unsigned insn. - uint64_t rem_ignored; - uint64_t q1 = libdivide_128_div_64_to_64(u1.hi, u1.lo, v1, &rem_ignored); - - // Undo normalization and division of u by 2. - u128_t q0 = {0, q1}; - libdivide_u128_shift(&q0.hi, &q0.lo, n); - libdivide_u128_shift(&q0.hi, &q0.lo, -63); - - // Make q0 correct or too small by 1 - // Equivalent to `if (q0 != 0) q0 = q0 - 1;` - if (q0.hi != 0 || q0.lo != 0) { - q0.hi -= (q0.lo == 0); // borrow - q0.lo -= 1; - } - - // Now q0 is correct. - // Compute q0 * v as q0v - // = (q0.hi << 64 + q0.lo) * (v.hi << 64 + v.lo) - // = (q0.hi * v.hi << 128) + (q0.hi * v.lo << 64) + - // (q0.lo * v.hi << 64) + q0.lo * v.lo) - // Each term is 128 bit - // High half of full product (upper 128 bits!) are dropped - u128_t q0v = {0, 0}; - q0v.hi = q0.hi * v.lo + q0.lo * v.hi + libdivide_mullhi_u64(q0.lo, v.lo); - q0v.lo = q0.lo * v.lo; - - // Compute u - q0v as u_q0v - // This is the remainder - u128_t u_q0v = u; - u_q0v.hi -= q0v.hi + (u.lo < q0v.lo); // second term is borrow - u_q0v.lo -= q0v.lo; - - // Check if u_q0v >= v - // This checks if our remainder is larger than the divisor - if ((u_q0v.hi > v.hi) || (u_q0v.hi == v.hi && u_q0v.lo >= v.lo)) { - // Increment q0 - q0.lo += 1; - q0.hi += (q0.lo == 0); // carry - - // Subtract v from remainder - u_q0v.hi -= v.hi + (u_q0v.lo < v.lo); - u_q0v.lo -= v.lo; - } - - *r_hi = u_q0v.hi; - *r_lo = u_q0v.lo; - - LIBDIVIDE_ASSERT(q0.hi == 0); - return q0.lo; -#endif -} - -////////// UINT32 - -static inline struct libdivide_u32_t libdivide_internal_u32_gen(uint32_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_u32_t result; - uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(d); - - // Power of 2 - if ((d & (d - 1)) == 0) { - // We need to subtract 1 from the shift value in case of an unsigned - // branchfree divider because there is a hardcoded right shift by 1 - // in its division algorithm. Because of this we also need to add back - // 1 in its recovery algorithm. - result.magic = 0; - result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); - } else { - uint8_t more; - uint32_t rem, proposed_m; - proposed_m = libdivide_64_div_32_to_32(1U << floor_log_2_d, 0, d, &rem); - - LIBDIVIDE_ASSERT(rem > 0 && rem < d); - const uint32_t e = d - rem; - - // This power works if e < 2**floor_log_2_d. - if (!branchfree && (e < (1U << floor_log_2_d))) { - // This power works - more = floor_log_2_d; - } else { - // We have to use the general 33-bit algorithm. We need to compute - // (2**power) / d. However, we already have (2**(power-1))/d and - // its remainder. By doubling both, and then correcting the - // remainder, we can compute the larger division. - // don't care about overflow here - in fact, we expect it - proposed_m += proposed_m; - const uint32_t twice_rem = rem + rem; - if (twice_rem >= d || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - result.magic = 1 + proposed_m; - result.more = more; - // result.more's shift should in general be ceil_log_2_d. But if we - // used the smaller power, we subtract one from the shift because we're - // using the smaller power. If we're using the larger power, we - // subtract one from the shift because it's taken care of by the add - // indicator. So floor_log_2_d happens to be correct in both cases. - } - return result; -} - -struct libdivide_u32_t libdivide_u32_gen(uint32_t d) { - return libdivide_internal_u32_gen(d, 0); -} - -struct libdivide_u32_branchfree_t libdivide_u32_branchfree_gen(uint32_t d) { - if (d == 1) { - LIBDIVIDE_ERROR("branchfree divider must be != 1"); - } - struct libdivide_u32_t tmp = libdivide_internal_u32_gen(d, 1); - struct libdivide_u32_branchfree_t ret = { - tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_32_SHIFT_MASK)}; - return ret; -} - -uint32_t libdivide_u32_do(uint32_t numer, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return numer >> more; - } else { - uint32_t q = libdivide_mullhi_u32(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - uint32_t t = ((numer - q) >> 1) + q; - return t >> (more & LIBDIVIDE_32_SHIFT_MASK); - } else { - // All upper bits are 0, - // don't need to mask them off. - return q >> more; - } - } -} - -uint32_t libdivide_u32_branchfree_do( - uint32_t numer, const struct libdivide_u32_branchfree_t *denom) { - uint32_t q = libdivide_mullhi_u32(denom->magic, numer); - uint32_t t = ((numer - q) >> 1) + q; - return t >> denom->more; -} - -uint32_t libdivide_u32_recover(const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - return 1U << shift; - } else if (!(more & LIBDIVIDE_ADD_MARKER)) { - // We compute q = n/d = n*m / 2^(32 + shift) - // Therefore we have d = 2^(32 + shift) / m - // We need to ceil it. - // We know d is not a power of 2, so m is not a power of 2, - // so we can just add 1 to the floor - uint32_t hi_dividend = 1U << shift; - uint32_t rem_ignored; - return 1 + libdivide_64_div_32_to_32(hi_dividend, 0, denom->magic, &rem_ignored); - } else { - // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). - // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now - // Also note that shift may be as high as 31, so shift + 1 will - // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and - // then double the quotient and remainder. - uint64_t half_n = 1ULL << (32 + shift); - uint64_t d = (1ULL << 32) | denom->magic; - // Note that the quotient is guaranteed <= 32 bits, but the remainder - // may need 33! - uint32_t half_q = (uint32_t)(half_n / d); - uint64_t rem = half_n % d; - // We computed 2^(32+shift)/(m+2^32) - // Need to double it, and then add 1 to the quotient if doubling th - // remainder would increase the quotient. - // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits - uint32_t full_q = half_q + half_q + ((rem << 1) >= d); - - // We rounded down in gen (hence +1) - return full_q + 1; - } -} - -uint32_t libdivide_u32_branchfree_recover(const struct libdivide_u32_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - return 1U << (shift + 1); - } else { - // Here we wish to compute d = 2^(32+shift+1)/(m+2^32). - // Notice (m + 2^32) is a 33 bit number. Use 64 bit division for now - // Also note that shift may be as high as 31, so shift + 1 will - // overflow. So we have to compute it as 2^(32+shift)/(m+2^32), and - // then double the quotient and remainder. - uint64_t half_n = 1ULL << (32 + shift); - uint64_t d = (1ULL << 32) | denom->magic; - // Note that the quotient is guaranteed <= 32 bits, but the remainder - // may need 33! - uint32_t half_q = (uint32_t)(half_n / d); - uint64_t rem = half_n % d; - // We computed 2^(32+shift)/(m+2^32) - // Need to double it, and then add 1 to the quotient if doubling th - // remainder would increase the quotient. - // Note that rem<<1 cannot overflow, since rem < d and d is 33 bits - uint32_t full_q = half_q + half_q + ((rem << 1) >= d); - - // We rounded down in gen (hence +1) - return full_q + 1; - } -} - -/////////// UINT64 - -static inline struct libdivide_u64_t libdivide_internal_u64_gen(uint64_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_u64_t result; - uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(d); - - // Power of 2 - if ((d & (d - 1)) == 0) { - // We need to subtract 1 from the shift value in case of an unsigned - // branchfree divider because there is a hardcoded right shift by 1 - // in its division algorithm. Because of this we also need to add back - // 1 in its recovery algorithm. - result.magic = 0; - result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); - } else { - uint64_t proposed_m, rem; - uint8_t more; - // (1 << (64 + floor_log_2_d)) / d - proposed_m = libdivide_128_div_64_to_64(1ULL << floor_log_2_d, 0, d, &rem); - - LIBDIVIDE_ASSERT(rem > 0 && rem < d); - const uint64_t e = d - rem; - - // This power works if e < 2**floor_log_2_d. - if (!branchfree && e < (1ULL << floor_log_2_d)) { - // This power works - more = floor_log_2_d; - } else { - // We have to use the general 65-bit algorithm. We need to compute - // (2**power) / d. However, we already have (2**(power-1))/d and - // its remainder. By doubling both, and then correcting the - // remainder, we can compute the larger division. - // don't care about overflow here - in fact, we expect it - proposed_m += proposed_m; - const uint64_t twice_rem = rem + rem; - if (twice_rem >= d || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - result.magic = 1 + proposed_m; - result.more = more; - // result.more's shift should in general be ceil_log_2_d. But if we - // used the smaller power, we subtract one from the shift because we're - // using the smaller power. If we're using the larger power, we - // subtract one from the shift because it's taken care of by the add - // indicator. So floor_log_2_d happens to be correct in both cases, - // which is why we do it outside of the if statement. - } - return result; -} - -struct libdivide_u64_t libdivide_u64_gen(uint64_t d) { - return libdivide_internal_u64_gen(d, 0); -} - -struct libdivide_u64_branchfree_t libdivide_u64_branchfree_gen(uint64_t d) { - if (d == 1) { - LIBDIVIDE_ERROR("branchfree divider must be != 1"); - } - struct libdivide_u64_t tmp = libdivide_internal_u64_gen(d, 1); - struct libdivide_u64_branchfree_t ret = { - tmp.magic, (uint8_t)(tmp.more & LIBDIVIDE_64_SHIFT_MASK)}; - return ret; -} - -uint64_t libdivide_u64_do(uint64_t numer, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return numer >> more; - } else { - uint64_t q = libdivide_mullhi_u64(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - uint64_t t = ((numer - q) >> 1) + q; - return t >> (more & LIBDIVIDE_64_SHIFT_MASK); - } else { - // All upper bits are 0, - // don't need to mask them off. - return q >> more; - } - } -} - -uint64_t libdivide_u64_branchfree_do( - uint64_t numer, const struct libdivide_u64_branchfree_t *denom) { - uint64_t q = libdivide_mullhi_u64(denom->magic, numer); - uint64_t t = ((numer - q) >> 1) + q; - return t >> denom->more; -} - -uint64_t libdivide_u64_recover(const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { - return 1ULL << shift; - } else if (!(more & LIBDIVIDE_ADD_MARKER)) { - // We compute q = n/d = n*m / 2^(64 + shift) - // Therefore we have d = 2^(64 + shift) / m - // We need to ceil it. - // We know d is not a power of 2, so m is not a power of 2, - // so we can just add 1 to the floor - uint64_t hi_dividend = 1ULL << shift; - uint64_t rem_ignored; - return 1 + libdivide_128_div_64_to_64(hi_dividend, 0, denom->magic, &rem_ignored); - } else { - // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). - // Notice (m + 2^64) is a 65 bit number. This gets hairy. See - // libdivide_u32_recover for more on what we do here. - // TODO: do something better than 128 bit math - - // Full n is a (potentially) 129 bit value - // half_n is a 128 bit value - // Compute the hi half of half_n. Low half is 0. - uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; - // d is a 65 bit value. The high bit is always set to 1. - const uint64_t d_hi = 1, d_lo = denom->magic; - // Note that the quotient is guaranteed <= 64 bits, - // but the remainder may need 65! - uint64_t r_hi, r_lo; - uint64_t half_q = - libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); - // We computed 2^(64+shift)/(m+2^64) - // Double the remainder ('dr') and check if that is larger than d - // Note that d is a 65 bit value, so r1 is small and so r1 + r1 - // cannot overflow - uint64_t dr_lo = r_lo + r_lo; - uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry - int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); - uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); - return full_q + 1; - } -} - -uint64_t libdivide_u64_branchfree_recover(const struct libdivide_u64_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { - return 1ULL << (shift + 1); - } else { - // Here we wish to compute d = 2^(64+shift+1)/(m+2^64). - // Notice (m + 2^64) is a 65 bit number. This gets hairy. See - // libdivide_u32_recover for more on what we do here. - // TODO: do something better than 128 bit math - - // Full n is a (potentially) 129 bit value - // half_n is a 128 bit value - // Compute the hi half of half_n. Low half is 0. - uint64_t half_n_hi = 1ULL << shift, half_n_lo = 0; - // d is a 65 bit value. The high bit is always set to 1. - const uint64_t d_hi = 1, d_lo = denom->magic; - // Note that the quotient is guaranteed <= 64 bits, - // but the remainder may need 65! - uint64_t r_hi, r_lo; - uint64_t half_q = - libdivide_128_div_128_to_64(half_n_hi, half_n_lo, d_hi, d_lo, &r_hi, &r_lo); - // We computed 2^(64+shift)/(m+2^64) - // Double the remainder ('dr') and check if that is larger than d - // Note that d is a 65 bit value, so r1 is small and so r1 + r1 - // cannot overflow - uint64_t dr_lo = r_lo + r_lo; - uint64_t dr_hi = r_hi + r_hi + (dr_lo < r_lo); // last term is carry - int dr_exceeds_d = (dr_hi > d_hi) || (dr_hi == d_hi && dr_lo >= d_lo); - uint64_t full_q = half_q + half_q + (dr_exceeds_d ? 1 : 0); - return full_q + 1; - } -} - -/////////// SINT32 - -static inline struct libdivide_s32_t libdivide_internal_s32_gen(int32_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_s32_t result; - - // If d is a power of 2, or negative a power of 2, we have to use a shift. - // This is especially important because the magic algorithm fails for -1. - // To check if d is a power of 2 or its inverse, it suffices to check - // whether its absolute value has exactly one bit set. This works even for - // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set - // and is a power of 2. - uint32_t ud = (uint32_t)d; - uint32_t absD = (d < 0) ? -ud : ud; - uint32_t floor_log_2_d = 31 - libdivide_count_leading_zeros32(absD); - // check if exactly one bit is set, - // don't care if absD is 0 since that's divide by zero - if ((absD & (absD - 1)) == 0) { - // Branchfree and normal paths are exactly the same - result.magic = 0; - result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); - } else { - LIBDIVIDE_ASSERT(floor_log_2_d >= 1); - - uint8_t more; - // the dividend here is 2**(floor_log_2_d + 31), so the low 32 bit word - // is 0 and the high word is floor_log_2_d - 1 - uint32_t rem, proposed_m; - proposed_m = libdivide_64_div_32_to_32(1U << (floor_log_2_d - 1), 0, absD, &rem); - const uint32_t e = absD - rem; - - // We are going to start with a power of floor_log_2_d - 1. - // This works if works if e < 2**floor_log_2_d. - if (!branchfree && e < (1U << floor_log_2_d)) { - // This power works - more = floor_log_2_d - 1; - } else { - // We need to go one higher. This should not make proposed_m - // overflow, but it will make it negative when interpreted as an - // int32_t. - proposed_m += proposed_m; - const uint32_t twice_rem = rem + rem; - if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - - proposed_m += 1; - int32_t magic = (int32_t)proposed_m; - - // Mark if we are negative. Note we only negate the magic number in the - // branchfull case. - if (d < 0) { - more |= LIBDIVIDE_NEGATIVE_DIVISOR; - if (!branchfree) { - magic = -magic; - } - } - - result.more = more; - result.magic = magic; - } - return result; -} - -struct libdivide_s32_t libdivide_s32_gen(int32_t d) { - return libdivide_internal_s32_gen(d, 0); -} - -struct libdivide_s32_branchfree_t libdivide_s32_branchfree_gen(int32_t d) { - struct libdivide_s32_t tmp = libdivide_internal_s32_gen(d, 1); - struct libdivide_s32_branchfree_t result = {tmp.magic, tmp.more}; - return result; -} - -int32_t libdivide_s32_do(int32_t numer, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - - if (!denom->magic) { - uint32_t sign = (int8_t)more >> 7; - uint32_t mask = (1U << shift) - 1; - uint32_t uq = numer + ((numer >> 31) & mask); - int32_t q = (int32_t)uq; - q >>= shift; - q = (q ^ sign) - sign; - return q; - } else { - uint32_t uq = (uint32_t)libdivide_mullhi_s32(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift and then sign extend - int32_t sign = (int8_t)more >> 7; - // q += (more < 0 ? -numer : numer) - // cast required to avoid UB - uq += ((uint32_t)numer ^ sign) - sign; - } - int32_t q = (int32_t)uq; - q >>= shift; - q += (q < 0); - return q; - } -} - -int32_t libdivide_s32_branchfree_do(int32_t numer, const struct libdivide_s32_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift and then sign extend - int32_t sign = (int8_t)more >> 7; - int32_t magic = denom->magic; - int32_t q = libdivide_mullhi_s32(magic, numer); - q += numer; - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is a power of - // 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - uint32_t q_sign = (uint32_t)(q >> 31); - q += q_sign & ((1U << shift) - is_power_of_2); - - // Now arithmetic right shift - q >>= shift; - // Negate if needed - q = (q ^ sign) - sign; - - return q; -} - -int32_t libdivide_s32_recover(const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - if (!denom->magic) { - uint32_t absD = 1U << shift; - if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { - absD = -absD; - } - return (int32_t)absD; - } else { - // Unsigned math is much easier - // We negate the magic number only in the branchfull case, and we don't - // know which case we're in. However we have enough information to - // determine the correct sign of the magic number. The divisor was - // negative if LIBDIVIDE_NEGATIVE_DIVISOR is set. If ADD_MARKER is set, - // the magic number's sign is opposite that of the divisor. - // We want to compute the positive magic number. - int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); - int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) ? denom->magic > 0 : denom->magic < 0; - - // Handle the power of 2 case (including branchfree) - if (denom->magic == 0) { - int32_t result = 1U << shift; - return negative_divisor ? -result : result; - } - - uint32_t d = (uint32_t)(magic_was_negated ? -denom->magic : denom->magic); - uint64_t n = 1ULL << (32 + shift); // this shift cannot exceed 30 - uint32_t q = (uint32_t)(n / d); - int32_t result = (int32_t)q; - result += 1; - return negative_divisor ? -result : result; - } -} - -int32_t libdivide_s32_branchfree_recover(const struct libdivide_s32_branchfree_t *denom) { - return libdivide_s32_recover((const struct libdivide_s32_t *)denom); -} - -///////////// SINT64 - -static inline struct libdivide_s64_t libdivide_internal_s64_gen(int64_t d, int branchfree) { - if (d == 0) { - LIBDIVIDE_ERROR("divider must be != 0"); - } - - struct libdivide_s64_t result; - - // If d is a power of 2, or negative a power of 2, we have to use a shift. - // This is especially important because the magic algorithm fails for -1. - // To check if d is a power of 2 or its inverse, it suffices to check - // whether its absolute value has exactly one bit set. This works even for - // INT_MIN, because abs(INT_MIN) == INT_MIN, and INT_MIN has one bit set - // and is a power of 2. - uint64_t ud = (uint64_t)d; - uint64_t absD = (d < 0) ? -ud : ud; - uint32_t floor_log_2_d = 63 - libdivide_count_leading_zeros64(absD); - // check if exactly one bit is set, - // don't care if absD is 0 since that's divide by zero - if ((absD & (absD - 1)) == 0) { - // Branchfree and non-branchfree cases are the same - result.magic = 0; - result.more = floor_log_2_d | (d < 0 ? LIBDIVIDE_NEGATIVE_DIVISOR : 0); - } else { - // the dividend here is 2**(floor_log_2_d + 63), so the low 64 bit word - // is 0 and the high word is floor_log_2_d - 1 - uint8_t more; - uint64_t rem, proposed_m; - proposed_m = libdivide_128_div_64_to_64(1ULL << (floor_log_2_d - 1), 0, absD, &rem); - const uint64_t e = absD - rem; - - // We are going to start with a power of floor_log_2_d - 1. - // This works if works if e < 2**floor_log_2_d. - if (!branchfree && e < (1ULL << floor_log_2_d)) { - // This power works - more = floor_log_2_d - 1; - } else { - // We need to go one higher. This should not make proposed_m - // overflow, but it will make it negative when interpreted as an - // int32_t. - proposed_m += proposed_m; - const uint64_t twice_rem = rem + rem; - if (twice_rem >= absD || twice_rem < rem) proposed_m += 1; - // note that we only set the LIBDIVIDE_NEGATIVE_DIVISOR bit if we - // also set ADD_MARKER this is an annoying optimization that - // enables algorithm #4 to avoid the mask. However we always set it - // in the branchfree case - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - proposed_m += 1; - int64_t magic = (int64_t)proposed_m; - - // Mark if we are negative - if (d < 0) { - more |= LIBDIVIDE_NEGATIVE_DIVISOR; - if (!branchfree) { - magic = -magic; - } - } - - result.more = more; - result.magic = magic; - } - return result; -} - -struct libdivide_s64_t libdivide_s64_gen(int64_t d) { - return libdivide_internal_s64_gen(d, 0); -} - -struct libdivide_s64_branchfree_t libdivide_s64_branchfree_gen(int64_t d) { - struct libdivide_s64_t tmp = libdivide_internal_s64_gen(d, 1); - struct libdivide_s64_branchfree_t ret = {tmp.magic, tmp.more}; - return ret; -} - -int64_t libdivide_s64_do(int64_t numer, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - - if (!denom->magic) { // shift path - uint64_t mask = (1ULL << shift) - 1; - uint64_t uq = numer + ((numer >> 63) & mask); - int64_t q = (int64_t)uq; - q >>= shift; - // must be arithmetic shift and then sign-extend - int64_t sign = (int8_t)more >> 7; - q = (q ^ sign) - sign; - return q; - } else { - uint64_t uq = (uint64_t)libdivide_mullhi_s64(denom->magic, numer); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift and then sign extend - int64_t sign = (int8_t)more >> 7; - // q += (more < 0 ? -numer : numer) - // cast required to avoid UB - uq += ((uint64_t)numer ^ sign) - sign; - } - int64_t q = (int64_t)uq; - q >>= shift; - q += (q < 0); - return q; - } -} - -int64_t libdivide_s64_branchfree_do(int64_t numer, const struct libdivide_s64_branchfree_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift and then sign extend - int64_t sign = (int8_t)more >> 7; - int64_t magic = denom->magic; - int64_t q = libdivide_mullhi_s64(magic, numer); - q += numer; - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is a power of - // 2, or (2**shift) if it is not a power of 2. - uint64_t is_power_of_2 = (magic == 0); - uint64_t q_sign = (uint64_t)(q >> 63); - q += q_sign & ((1ULL << shift) - is_power_of_2); - - // Arithmetic right shift - q >>= shift; - // Negate if needed - q = (q ^ sign) - sign; - - return q; -} - -int64_t libdivide_s64_recover(const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - if (denom->magic == 0) { // shift path - uint64_t absD = 1ULL << shift; - if (more & LIBDIVIDE_NEGATIVE_DIVISOR) { - absD = -absD; - } - return (int64_t)absD; - } else { - // Unsigned math is much easier - int negative_divisor = (more & LIBDIVIDE_NEGATIVE_DIVISOR); - int magic_was_negated = (more & LIBDIVIDE_ADD_MARKER) ? denom->magic > 0 : denom->magic < 0; - - uint64_t d = (uint64_t)(magic_was_negated ? -denom->magic : denom->magic); - uint64_t n_hi = 1ULL << shift, n_lo = 0; - uint64_t rem_ignored; - uint64_t q = libdivide_128_div_64_to_64(n_hi, n_lo, d, &rem_ignored); - int64_t result = (int64_t)(q + 1); - if (negative_divisor) { - result = -result; - } - return result; - } -} - -int64_t libdivide_s64_branchfree_recover(const struct libdivide_s64_branchfree_t *denom) { - return libdivide_s64_recover((const struct libdivide_s64_t *)denom); -} - -#if defined(LIBDIVIDE_NEON) - -static inline uint32x4_t libdivide_u32_do_vec128( - uint32x4_t numers, const struct libdivide_u32_t *denom); -static inline int32x4_t libdivide_s32_do_vec128( - int32x4_t numers, const struct libdivide_s32_t *denom); -static inline uint64x2_t libdivide_u64_do_vec128( - uint64x2_t numers, const struct libdivide_u64_t *denom); -static inline int64x2_t libdivide_s64_do_vec128( - int64x2_t numers, const struct libdivide_s64_t *denom); - -static inline uint32x4_t libdivide_u32_branchfree_do_vec128( - uint32x4_t numers, const struct libdivide_u32_branchfree_t *denom); -static inline int32x4_t libdivide_s32_branchfree_do_vec128( - int32x4_t numers, const struct libdivide_s32_branchfree_t *denom); -static inline uint64x2_t libdivide_u64_branchfree_do_vec128( - uint64x2_t numers, const struct libdivide_u64_branchfree_t *denom); -static inline int64x2_t libdivide_s64_branchfree_do_vec128( - int64x2_t numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Logical right shift by runtime value. -// NEON implements right shift as left shits by negative values. -static inline uint32x4_t libdivide_u32_neon_srl(uint32x4_t v, uint8_t amt) { - int32_t wamt = static_cast(amt); - return vshlq_u32(v, vdupq_n_s32(-wamt)); -} - -static inline uint64x2_t libdivide_u64_neon_srl(uint64x2_t v, uint8_t amt) { - int64_t wamt = static_cast(amt); - return vshlq_u64(v, vdupq_n_s64(-wamt)); -} - -// Arithmetic right shift by runtime value. -static inline int32x4_t libdivide_s32_neon_sra(int32x4_t v, uint8_t amt) { - int32_t wamt = static_cast(amt); - return vshlq_s32(v, vdupq_n_s32(-wamt)); -} - -static inline int64x2_t libdivide_s64_neon_sra(int64x2_t v, uint8_t amt) { - int64_t wamt = static_cast(amt); - return vshlq_s64(v, vdupq_n_s64(-wamt)); -} - -static inline int64x2_t libdivide_s64_signbits(int64x2_t v) { return vshrq_n_s64(v, 63); } - -static inline uint32x4_t libdivide_mullhi_u32_vec128(uint32x4_t a, uint32_t b) { - // Desire is [x0, x1, x2, x3] - uint32x4_t w1 = vreinterpretq_u32_u64(vmull_n_u32(vget_low_u32(a), b)); // [_, x0, _, x1] - uint32x4_t w2 = vreinterpretq_u32_u64(vmull_high_n_u32(a, b)); //[_, x2, _, x3] - return vuzp2q_u32(w1, w2); // [x0, x1, x2, x3] -} - -static inline int32x4_t libdivide_mullhi_s32_vec128(int32x4_t a, int32_t b) { - int32x4_t w1 = vreinterpretq_s32_s64(vmull_n_s32(vget_low_s32(a), b)); // [_, x0, _, x1] - int32x4_t w2 = vreinterpretq_s32_s64(vmull_high_n_s32(a, b)); //[_, x2, _, x3] - return vuzp2q_s32(w1, w2); // [x0, x1, x2, x3] -} - -static inline uint64x2_t libdivide_mullhi_u64_vec128(uint64x2_t x, uint64_t sy) { - // full 128 bits product is: - // x0*y0 + (x0*y1 << 32) + (x1*y0 << 32) + (x1*y1 << 64) - // Note x0,y0,x1,y1 are all conceptually uint32, products are 32x32->64. - - // Get low and high words. x0 contains low 32 bits, x1 is high 32 bits. - uint64x2_t y = vdupq_n_u64(sy); - uint32x2_t x0 = vmovn_u64(x); - uint32x2_t y0 = vmovn_u64(y); - uint32x2_t x1 = vshrn_n_u64(x, 32); - uint32x2_t y1 = vshrn_n_u64(y, 32); - - // Compute x0*y0. - uint64x2_t x0y0 = vmull_u32(x0, y0); - uint64x2_t x0y0_hi = vshrq_n_u64(x0y0, 32); - - // Compute other intermediate products. - uint64x2_t temp = vmlal_u32(x0y0_hi, x1, y0); // temp = x0y0_hi + x1*y0; - // We want to split temp into its low 32 bits and high 32 bits, both - // in the low half of 64 bit registers. - // Use shifts to avoid needing a reg for the mask. - uint64x2_t temp_lo = vshrq_n_u64(vshlq_n_u64(temp, 32), 32); // temp_lo = temp & 0xFFFFFFFF; - uint64x2_t temp_hi = vshrq_n_u64(temp, 32); // temp_hi = temp >> 32; - - temp_lo = vmlal_u32(temp_lo, x0, y1); // temp_lo += x0*y0 - temp_lo = vshrq_n_u64(temp_lo, 32); // temp_lo >>= 32 - temp_hi = vmlal_u32(temp_hi, x1, y1); // temp_hi += x1*y1 - uint64x2_t result = vaddq_u64(temp_hi, temp_lo); - return result; -} - -static inline int64x2_t libdivide_mullhi_s64_vec128(int64x2_t x, int64_t sy) { - int64x2_t p = vreinterpretq_s64_u64( - libdivide_mullhi_u64_vec128(vreinterpretq_u64_s64(x), static_cast(sy))); - int64x2_t y = vdupq_n_s64(sy); - int64x2_t t1 = vandq_s64(libdivide_s64_signbits(x), y); - int64x2_t t2 = vandq_s64(libdivide_s64_signbits(y), x); - p = vsubq_s64(p, t1); - p = vsubq_s64(p, t2); - return p; -} - -////////// UINT32 - -uint32x4_t libdivide_u32_do_vec128(uint32x4_t numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return libdivide_u32_neon_srl(numers, more); - } else { - uint32x4_t q = libdivide_mullhi_u32_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - // Note we can use halving-subtract to avoid the shift. - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32x4_t t = vaddq_u32(vhsubq_u32(numers, q), q); - return libdivide_u32_neon_srl(t, shift); - } else { - return libdivide_u32_neon_srl(q, more); - } - } -} - -uint32x4_t libdivide_u32_branchfree_do_vec128( - uint32x4_t numers, const struct libdivide_u32_branchfree_t *denom) { - uint32x4_t q = libdivide_mullhi_u32_vec128(numers, denom->magic); - uint32x4_t t = vaddq_u32(vhsubq_u32(numers, q), q); - return libdivide_u32_neon_srl(t, denom->more); -} - -////////// UINT64 - -uint64x2_t libdivide_u64_do_vec128(uint64x2_t numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return libdivide_u64_neon_srl(numers, more); - } else { - uint64x2_t q = libdivide_mullhi_u64_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - // No 64-bit halving subtracts in NEON :( - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64x2_t t = vaddq_u64(vshrq_n_u64(vsubq_u64(numers, q), 1), q); - return libdivide_u64_neon_srl(t, shift); - } else { - return libdivide_u64_neon_srl(q, more); - } - } -} - -uint64x2_t libdivide_u64_branchfree_do_vec128( - uint64x2_t numers, const struct libdivide_u64_branchfree_t *denom) { - uint64x2_t q = libdivide_mullhi_u64_vec128(numers, denom->magic); - uint64x2_t t = vaddq_u64(vshrq_n_u64(vsubq_u64(numers, q), 1), q); - return libdivide_u64_neon_srl(t, denom->more); -} - -////////// SINT32 - -int32x4_t libdivide_s32_do_vec128(int32x4_t numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - int32x4_t roundToZeroTweak = vdupq_n_s32((int)mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - int32x4_t q = vaddq_s32(numers, vandq_s32(vshrq_n_s32(numers, 31), roundToZeroTweak)); - q = libdivide_s32_neon_sra(q, shift); - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = vsubq_s32(veorq_s32(q, sign), sign); - return q; - } else { - int32x4_t q = libdivide_mullhi_s32_vec128(numers, denom->magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = vaddq_s32(q, vsubq_s32(veorq_s32(numers, sign), sign)); - } - // q >>= shift - q = libdivide_s32_neon_sra(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = vaddq_s32( - q, vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(q), 31))); // q += (q < 0) - return q; - } -} - -int32x4_t libdivide_s32_branchfree_do_vec128( - int32x4_t numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - int32x4_t sign = vdupq_n_s32((int8_t)more >> 7); - int32x4_t q = libdivide_mullhi_s32_vec128(numers, magic); - q = vaddq_s32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - int32x4_t q_sign = vshrq_n_s32(q, 31); // q_sign = q >> 31 - int32x4_t mask = vdupq_n_s32((1U << shift) - is_power_of_2); - q = vaddq_s32(q, vandq_s32(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s32_neon_sra(q, shift); // q >>= shift - q = vsubq_s32(veorq_s32(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -int64x2_t libdivide_s64_do_vec128(int64x2_t numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - int64x2_t roundToZeroTweak = vdupq_n_s64(mask); // TODO: no need to sign extend - // q = numer + ((numer >> 63) & roundToZeroTweak); - int64x2_t q = - vaddq_s64(numers, vandq_s64(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_neon_sra(q, shift); - // q = (q ^ sign) - sign; - int64x2_t sign = vreinterpretq_s64_s8(vdupq_n_s8((int8_t)more >> 7)); - q = vsubq_s64(veorq_s64(q, sign), sign); - return q; - } else { - int64x2_t q = libdivide_mullhi_s64_vec128(numers, magic); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - int64x2_t sign = vdupq_n_s64((int8_t)more >> 7); // TODO: no need to widen - // q += ((numer ^ sign) - sign); - q = vaddq_s64(q, vsubq_s64(veorq_s64(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_neon_sra(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = vaddq_s64( - q, vreinterpretq_s64_u64(vshrq_n_u64(vreinterpretq_u64_s64(q), 63))); // q += (q < 0) - return q; - } -} - -int64x2_t libdivide_s64_branchfree_do_vec128( - int64x2_t numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - int64x2_t sign = vdupq_n_s64((int8_t)more >> 7); // TODO: avoid sign extend - - // libdivide_mullhi_s64(numers, magic); - int64x2_t q = libdivide_mullhi_s64_vec128(numers, magic); - q = vaddq_s64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - int64x2_t q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - int64x2_t mask = vdupq_n_s64((1ULL << shift) - is_power_of_2); - q = vaddq_s64(q, vandq_s64(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_neon_sra(q, shift); // q >>= shift - q = vsubq_s64(veorq_s64(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_AVX512) - -static inline __m512i libdivide_u32_do_vec512(__m512i numers, const struct libdivide_u32_t *denom); -static inline __m512i libdivide_s32_do_vec512(__m512i numers, const struct libdivide_s32_t *denom); -static inline __m512i libdivide_u64_do_vec512(__m512i numers, const struct libdivide_u64_t *denom); -static inline __m512i libdivide_s64_do_vec512(__m512i numers, const struct libdivide_s64_t *denom); - -static inline __m512i libdivide_u32_branchfree_do_vec512( - __m512i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m512i libdivide_s32_branchfree_do_vec512( - __m512i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m512i libdivide_u64_branchfree_do_vec512( - __m512i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m512i libdivide_s64_branchfree_do_vec512( - __m512i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -static inline __m512i libdivide_s64_signbits(__m512i v) { - ; - return _mm512_srai_epi64(v, 63); -} - -static inline __m512i libdivide_s64_shift_right_vec512(__m512i v, int amt) { - return _mm512_srai_epi64(v, amt); -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m512i libdivide_mullhi_u32_vec512(__m512i a, __m512i b) { - __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epu32(a, b), 32); - __m512i a1X3X = _mm512_srli_epi64(a, 32); - __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); - __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epu32(a1X3X, b), mask); - return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// b is one 32-bit value repeated. -static inline __m512i libdivide_mullhi_s32_vec512(__m512i a, __m512i b) { - __m512i hi_product_0Z2Z = _mm512_srli_epi64(_mm512_mul_epi32(a, b), 32); - __m512i a1X3X = _mm512_srli_epi64(a, 32); - __m512i mask = _mm512_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0, -1, 0); - __m512i hi_product_Z1Z3 = _mm512_and_si512(_mm512_mul_epi32(a1X3X, b), mask); - return _mm512_or_si512(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m512i libdivide_mullhi_u64_vec512(__m512i x, __m512i y) { - // see m128i variant for comments. - __m512i x0y0 = _mm512_mul_epu32(x, y); - __m512i x0y0_hi = _mm512_srli_epi64(x0y0, 32); - - __m512i x1 = _mm512_shuffle_epi32(x, (_MM_PERM_ENUM)_MM_SHUFFLE(3, 3, 1, 1)); - __m512i y1 = _mm512_shuffle_epi32(y, (_MM_PERM_ENUM)_MM_SHUFFLE(3, 3, 1, 1)); - - __m512i x0y1 = _mm512_mul_epu32(x, y1); - __m512i x1y0 = _mm512_mul_epu32(x1, y); - __m512i x1y1 = _mm512_mul_epu32(x1, y1); - - __m512i mask = _mm512_set1_epi64(0xFFFFFFFF); - __m512i temp = _mm512_add_epi64(x1y0, x0y0_hi); - __m512i temp_lo = _mm512_and_si512(temp, mask); - __m512i temp_hi = _mm512_srli_epi64(temp, 32); - - temp_lo = _mm512_srli_epi64(_mm512_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm512_add_epi64(x1y1, temp_hi); - return _mm512_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m512i libdivide_mullhi_s64_vec512(__m512i x, __m512i y) { - __m512i p = libdivide_mullhi_u64_vec512(x, y); - __m512i t1 = _mm512_and_si512(libdivide_s64_signbits(x), y); - __m512i t2 = _mm512_and_si512(libdivide_s64_signbits(y), x); - p = _mm512_sub_epi64(p, t1); - p = _mm512_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m512i libdivide_u32_do_vec512(__m512i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm512_srli_epi32(numers, more); - } else { - __m512i q = libdivide_mullhi_u32_vec512(numers, _mm512_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); - return _mm512_srli_epi32(t, shift); - } else { - return _mm512_srli_epi32(q, more); - } - } -} - -__m512i libdivide_u32_branchfree_do_vec512( - __m512i numers, const struct libdivide_u32_branchfree_t *denom) { - __m512i q = libdivide_mullhi_u32_vec512(numers, _mm512_set1_epi32(denom->magic)); - __m512i t = _mm512_add_epi32(_mm512_srli_epi32(_mm512_sub_epi32(numers, q), 1), q); - return _mm512_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m512i libdivide_u64_do_vec512(__m512i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm512_srli_epi64(numers, more); - } else { - __m512i q = libdivide_mullhi_u64_vec512(numers, _mm512_set1_epi64(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); - return _mm512_srli_epi64(t, shift); - } else { - return _mm512_srli_epi64(q, more); - } - } -} - -__m512i libdivide_u64_branchfree_do_vec512( - __m512i numers, const struct libdivide_u64_branchfree_t *denom) { - __m512i q = libdivide_mullhi_u64_vec512(numers, _mm512_set1_epi64(denom->magic)); - __m512i t = _mm512_add_epi64(_mm512_srli_epi64(_mm512_sub_epi64(numers, q), 1), q); - return _mm512_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m512i libdivide_s32_do_vec512(__m512i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m512i roundToZeroTweak = _mm512_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m512i q = _mm512_add_epi32( - numers, _mm512_and_si512(_mm512_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm512_srai_epi32(q, shift); - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); - return q; - } else { - __m512i q = libdivide_mullhi_s32_vec512(numers, _mm512_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm512_add_epi32(q, _mm512_sub_epi32(_mm512_xor_si512(numers, sign), sign)); - } - // q >>= shift - q = _mm512_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm512_add_epi32(q, _mm512_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m512i libdivide_s32_branchfree_do_vec512( - __m512i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - __m512i q = libdivide_mullhi_s32_vec512(numers, _mm512_set1_epi32(magic)); - q = _mm512_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m512i q_sign = _mm512_srai_epi32(q, 31); // q_sign = q >> 31 - __m512i mask = _mm512_set1_epi32((1U << shift) - is_power_of_2); - q = _mm512_add_epi32(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm512_srai_epi32(q, shift); // q >>= shift - q = _mm512_sub_epi32(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m512i libdivide_s64_do_vec512(__m512i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m512i roundToZeroTweak = _mm512_set1_epi64(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m512i q = _mm512_add_epi64( - numers, _mm512_and_si512(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec512(q, shift); - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); - return q; - } else { - __m512i q = libdivide_mullhi_s64_vec512(numers, _mm512_set1_epi64(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm512_add_epi64(q, _mm512_sub_epi64(_mm512_xor_si512(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec512(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm512_add_epi64(q, _mm512_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m512i libdivide_s64_branchfree_do_vec512( - __m512i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m512i sign = _mm512_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m512i q = libdivide_mullhi_s64_vec512(numers, _mm512_set1_epi64(magic)); - q = _mm512_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m512i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m512i mask = _mm512_set1_epi64((1ULL << shift) - is_power_of_2); - q = _mm512_add_epi64(q, _mm512_and_si512(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec512(q, shift); // q >>= shift - q = _mm512_sub_epi64(_mm512_xor_si512(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_AVX2) - -static inline __m256i libdivide_u32_do_vec256(__m256i numers, const struct libdivide_u32_t *denom); -static inline __m256i libdivide_s32_do_vec256(__m256i numers, const struct libdivide_s32_t *denom); -static inline __m256i libdivide_u64_do_vec256(__m256i numers, const struct libdivide_u64_t *denom); -static inline __m256i libdivide_s64_do_vec256(__m256i numers, const struct libdivide_s64_t *denom); - -static inline __m256i libdivide_u32_branchfree_do_vec256( - __m256i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m256i libdivide_s32_branchfree_do_vec256( - __m256i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m256i libdivide_u64_branchfree_do_vec256( - __m256i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m256i libdivide_s64_branchfree_do_vec256( - __m256i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Implementation of _mm256_srai_epi64(v, 63) (from AVX512). -static inline __m256i libdivide_s64_signbits(__m256i v) { - __m256i hiBitsDuped = _mm256_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); - __m256i signBits = _mm256_srai_epi32(hiBitsDuped, 31); - return signBits; -} - -// Implementation of _mm256_srai_epi64 (from AVX512). -static inline __m256i libdivide_s64_shift_right_vec256(__m256i v, int amt) { - const int b = 64 - amt; - __m256i m = _mm256_set1_epi64x(1ULL << (b - 1)); - __m256i x = _mm256_srli_epi64(v, amt); - __m256i result = _mm256_sub_epi64(_mm256_xor_si256(x, m), m); - return result; -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m256i libdivide_mullhi_u32_vec256(__m256i a, __m256i b) { - __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epu32(a, b), 32); - __m256i a1X3X = _mm256_srli_epi64(a, 32); - __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); - __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epu32(a1X3X, b), mask); - return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// b is one 32-bit value repeated. -static inline __m256i libdivide_mullhi_s32_vec256(__m256i a, __m256i b) { - __m256i hi_product_0Z2Z = _mm256_srli_epi64(_mm256_mul_epi32(a, b), 32); - __m256i a1X3X = _mm256_srli_epi64(a, 32); - __m256i mask = _mm256_set_epi32(-1, 0, -1, 0, -1, 0, -1, 0); - __m256i hi_product_Z1Z3 = _mm256_and_si256(_mm256_mul_epi32(a1X3X, b), mask); - return _mm256_or_si256(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m256i libdivide_mullhi_u64_vec256(__m256i x, __m256i y) { - // see m128i variant for comments. - __m256i x0y0 = _mm256_mul_epu32(x, y); - __m256i x0y0_hi = _mm256_srli_epi64(x0y0, 32); - - __m256i x1 = _mm256_shuffle_epi32(x, _MM_SHUFFLE(3, 3, 1, 1)); - __m256i y1 = _mm256_shuffle_epi32(y, _MM_SHUFFLE(3, 3, 1, 1)); - - __m256i x0y1 = _mm256_mul_epu32(x, y1); - __m256i x1y0 = _mm256_mul_epu32(x1, y); - __m256i x1y1 = _mm256_mul_epu32(x1, y1); - - __m256i mask = _mm256_set1_epi64x(0xFFFFFFFF); - __m256i temp = _mm256_add_epi64(x1y0, x0y0_hi); - __m256i temp_lo = _mm256_and_si256(temp, mask); - __m256i temp_hi = _mm256_srli_epi64(temp, 32); - - temp_lo = _mm256_srli_epi64(_mm256_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm256_add_epi64(x1y1, temp_hi); - return _mm256_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m256i libdivide_mullhi_s64_vec256(__m256i x, __m256i y) { - __m256i p = libdivide_mullhi_u64_vec256(x, y); - __m256i t1 = _mm256_and_si256(libdivide_s64_signbits(x), y); - __m256i t2 = _mm256_and_si256(libdivide_s64_signbits(y), x); - p = _mm256_sub_epi64(p, t1); - p = _mm256_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m256i libdivide_u32_do_vec256(__m256i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm256_srli_epi32(numers, more); - } else { - __m256i q = libdivide_mullhi_u32_vec256(numers, _mm256_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); - return _mm256_srli_epi32(t, shift); - } else { - return _mm256_srli_epi32(q, more); - } - } -} - -__m256i libdivide_u32_branchfree_do_vec256( - __m256i numers, const struct libdivide_u32_branchfree_t *denom) { - __m256i q = libdivide_mullhi_u32_vec256(numers, _mm256_set1_epi32(denom->magic)); - __m256i t = _mm256_add_epi32(_mm256_srli_epi32(_mm256_sub_epi32(numers, q), 1), q); - return _mm256_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m256i libdivide_u64_do_vec256(__m256i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm256_srli_epi64(numers, more); - } else { - __m256i q = libdivide_mullhi_u64_vec256(numers, _mm256_set1_epi64x(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); - return _mm256_srli_epi64(t, shift); - } else { - return _mm256_srli_epi64(q, more); - } - } -} - -__m256i libdivide_u64_branchfree_do_vec256( - __m256i numers, const struct libdivide_u64_branchfree_t *denom) { - __m256i q = libdivide_mullhi_u64_vec256(numers, _mm256_set1_epi64x(denom->magic)); - __m256i t = _mm256_add_epi64(_mm256_srli_epi64(_mm256_sub_epi64(numers, q), 1), q); - return _mm256_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m256i libdivide_s32_do_vec256(__m256i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m256i roundToZeroTweak = _mm256_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m256i q = _mm256_add_epi32( - numers, _mm256_and_si256(_mm256_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm256_srai_epi32(q, shift); - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); - return q; - } else { - __m256i q = libdivide_mullhi_s32_vec256(numers, _mm256_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm256_add_epi32(q, _mm256_sub_epi32(_mm256_xor_si256(numers, sign), sign)); - } - // q >>= shift - q = _mm256_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm256_add_epi32(q, _mm256_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m256i libdivide_s32_branchfree_do_vec256( - __m256i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - __m256i q = libdivide_mullhi_s32_vec256(numers, _mm256_set1_epi32(magic)); - q = _mm256_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m256i q_sign = _mm256_srai_epi32(q, 31); // q_sign = q >> 31 - __m256i mask = _mm256_set1_epi32((1U << shift) - is_power_of_2); - q = _mm256_add_epi32(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm256_srai_epi32(q, shift); // q >>= shift - q = _mm256_sub_epi32(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m256i libdivide_s64_do_vec256(__m256i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m256i roundToZeroTweak = _mm256_set1_epi64x(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m256i q = _mm256_add_epi64( - numers, _mm256_and_si256(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec256(q, shift); - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); - return q; - } else { - __m256i q = libdivide_mullhi_s64_vec256(numers, _mm256_set1_epi64x(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm256_add_epi64(q, _mm256_sub_epi64(_mm256_xor_si256(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec256(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm256_add_epi64(q, _mm256_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m256i libdivide_s64_branchfree_do_vec256( - __m256i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m256i sign = _mm256_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m256i q = libdivide_mullhi_s64_vec256(numers, _mm256_set1_epi64x(magic)); - q = _mm256_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m256i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m256i mask = _mm256_set1_epi64x((1ULL << shift) - is_power_of_2); - q = _mm256_add_epi64(q, _mm256_and_si256(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec256(q, shift); // q >>= shift - q = _mm256_sub_epi64(_mm256_xor_si256(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -#if defined(LIBDIVIDE_SSE2) - -static inline __m128i libdivide_u32_do_vec128(__m128i numers, const struct libdivide_u32_t *denom); -static inline __m128i libdivide_s32_do_vec128(__m128i numers, const struct libdivide_s32_t *denom); -static inline __m128i libdivide_u64_do_vec128(__m128i numers, const struct libdivide_u64_t *denom); -static inline __m128i libdivide_s64_do_vec128(__m128i numers, const struct libdivide_s64_t *denom); - -static inline __m128i libdivide_u32_branchfree_do_vec128( - __m128i numers, const struct libdivide_u32_branchfree_t *denom); -static inline __m128i libdivide_s32_branchfree_do_vec128( - __m128i numers, const struct libdivide_s32_branchfree_t *denom); -static inline __m128i libdivide_u64_branchfree_do_vec128( - __m128i numers, const struct libdivide_u64_branchfree_t *denom); -static inline __m128i libdivide_s64_branchfree_do_vec128( - __m128i numers, const struct libdivide_s64_branchfree_t *denom); - -//////// Internal Utility Functions - -// Implementation of _mm_srai_epi64(v, 63) (from AVX512). -static inline __m128i libdivide_s64_signbits(__m128i v) { - __m128i hiBitsDuped = _mm_shuffle_epi32(v, _MM_SHUFFLE(3, 3, 1, 1)); - __m128i signBits = _mm_srai_epi32(hiBitsDuped, 31); - return signBits; -} - -// Implementation of _mm_srai_epi64 (from AVX512). -static inline __m128i libdivide_s64_shift_right_vec128(__m128i v, int amt) { - const int b = 64 - amt; - __m128i m = _mm_set1_epi64x(1ULL << (b - 1)); - __m128i x = _mm_srli_epi64(v, amt); - __m128i result = _mm_sub_epi64(_mm_xor_si128(x, m), m); - return result; -} - -// Here, b is assumed to contain one 32-bit value repeated. -static inline __m128i libdivide_mullhi_u32_vec128(__m128i a, __m128i b) { - __m128i hi_product_0Z2Z = _mm_srli_epi64(_mm_mul_epu32(a, b), 32); - __m128i a1X3X = _mm_srli_epi64(a, 32); - __m128i mask = _mm_set_epi32(-1, 0, -1, 0); - __m128i hi_product_Z1Z3 = _mm_and_si128(_mm_mul_epu32(a1X3X, b), mask); - return _mm_or_si128(hi_product_0Z2Z, hi_product_Z1Z3); -} - -// SSE2 does not have a signed multiplication instruction, but we can convert -// unsigned to signed pretty efficiently. Again, b is just a 32 bit value -// repeated four times. -static inline __m128i libdivide_mullhi_s32_vec128(__m128i a, __m128i b) { - __m128i p = libdivide_mullhi_u32_vec128(a, b); - // t1 = (a >> 31) & y, arithmetic shift - __m128i t1 = _mm_and_si128(_mm_srai_epi32(a, 31), b); - __m128i t2 = _mm_and_si128(_mm_srai_epi32(b, 31), a); - p = _mm_sub_epi32(p, t1); - p = _mm_sub_epi32(p, t2); - return p; -} - -// Here, y is assumed to contain one 64-bit value repeated. -static inline __m128i libdivide_mullhi_u64_vec128(__m128i x, __m128i y) { - // full 128 bits product is: - // x0*y0 + (x0*y1 << 32) + (x1*y0 << 32) + (x1*y1 << 64) - // Note x0,y0,x1,y1 are all conceptually uint32, products are 32x32->64. - - // Compute x0*y0. - // Note x1, y1 are ignored by mul_epu32. - __m128i x0y0 = _mm_mul_epu32(x, y); - __m128i x0y0_hi = _mm_srli_epi64(x0y0, 32); - - // Get x1, y1 in the low bits. - // We could shuffle or right shift. Shuffles are preferred as they preserve - // the source register for the next computation. - __m128i x1 = _mm_shuffle_epi32(x, _MM_SHUFFLE(3, 3, 1, 1)); - __m128i y1 = _mm_shuffle_epi32(y, _MM_SHUFFLE(3, 3, 1, 1)); - - // No need to mask off top 32 bits for mul_epu32. - __m128i x0y1 = _mm_mul_epu32(x, y1); - __m128i x1y0 = _mm_mul_epu32(x1, y); - __m128i x1y1 = _mm_mul_epu32(x1, y1); - - // Mask here selects low bits only. - __m128i mask = _mm_set1_epi64x(0xFFFFFFFF); - __m128i temp = _mm_add_epi64(x1y0, x0y0_hi); - __m128i temp_lo = _mm_and_si128(temp, mask); - __m128i temp_hi = _mm_srli_epi64(temp, 32); - - temp_lo = _mm_srli_epi64(_mm_add_epi64(temp_lo, x0y1), 32); - temp_hi = _mm_add_epi64(x1y1, temp_hi); - return _mm_add_epi64(temp_lo, temp_hi); -} - -// y is one 64-bit value repeated. -static inline __m128i libdivide_mullhi_s64_vec128(__m128i x, __m128i y) { - __m128i p = libdivide_mullhi_u64_vec128(x, y); - __m128i t1 = _mm_and_si128(libdivide_s64_signbits(x), y); - __m128i t2 = _mm_and_si128(libdivide_s64_signbits(y), x); - p = _mm_sub_epi64(p, t1); - p = _mm_sub_epi64(p, t2); - return p; -} - -////////// UINT32 - -__m128i libdivide_u32_do_vec128(__m128i numers, const struct libdivide_u32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm_srli_epi32(numers, more); - } else { - __m128i q = libdivide_mullhi_u32_vec128(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srli_epi32(t, shift); - } else { - return _mm_srli_epi32(q, more); - } - } -} - -__m128i libdivide_u32_branchfree_do_vec128( - __m128i numers, const struct libdivide_u32_branchfree_t *denom) { - __m128i q = libdivide_mullhi_u32_vec128(numers, _mm_set1_epi32(denom->magic)); - __m128i t = _mm_add_epi32(_mm_srli_epi32(_mm_sub_epi32(numers, q), 1), q); - return _mm_srli_epi32(t, denom->more); -} - -////////// UINT64 - -__m128i libdivide_u64_do_vec128(__m128i numers, const struct libdivide_u64_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - return _mm_srli_epi64(numers, more); - } else { - __m128i q = libdivide_mullhi_u64_vec128(numers, _mm_set1_epi64x(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // uint32_t t = ((numer - q) >> 1) + q; - // return t >> denom->shift; - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srli_epi64(t, shift); - } else { - return _mm_srli_epi64(q, more); - } - } -} - -__m128i libdivide_u64_branchfree_do_vec128( - __m128i numers, const struct libdivide_u64_branchfree_t *denom) { - __m128i q = libdivide_mullhi_u64_vec128(numers, _mm_set1_epi64x(denom->magic)); - __m128i t = _mm_add_epi64(_mm_srli_epi64(_mm_sub_epi64(numers, q), 1), q); - return _mm_srli_epi64(t, denom->more); -} - -////////// SINT32 - -__m128i libdivide_s32_do_vec128(__m128i numers, const struct libdivide_s32_t *denom) { - uint8_t more = denom->more; - if (!denom->magic) { - uint32_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - uint32_t mask = (1U << shift) - 1; - __m128i roundToZeroTweak = _mm_set1_epi32(mask); - // q = numer + ((numer >> 31) & roundToZeroTweak); - __m128i q = - _mm_add_epi32(numers, _mm_and_si128(_mm_srai_epi32(numers, 31), roundToZeroTweak)); - q = _mm_srai_epi32(q, shift); - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); - return q; - } else { - __m128i q = libdivide_mullhi_s32_vec128(numers, _mm_set1_epi32(denom->magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm_add_epi32(q, _mm_sub_epi32(_mm_xor_si128(numers, sign), sign)); - } - // q >>= shift - q = _mm_srai_epi32(q, more & LIBDIVIDE_32_SHIFT_MASK); - q = _mm_add_epi32(q, _mm_srli_epi32(q, 31)); // q += (q < 0) - return q; - } -} - -__m128i libdivide_s32_branchfree_do_vec128( - __m128i numers, const struct libdivide_s32_branchfree_t *denom) { - int32_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_32_SHIFT_MASK; - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - __m128i q = libdivide_mullhi_s32_vec128(numers, _mm_set1_epi32(magic)); - q = _mm_add_epi32(q, numers); // q += numers - - // If q is non-negative, we have nothing to do - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2 - uint32_t is_power_of_2 = (magic == 0); - __m128i q_sign = _mm_srai_epi32(q, 31); // q_sign = q >> 31 - __m128i mask = _mm_set1_epi32((1U << shift) - is_power_of_2); - q = _mm_add_epi32(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) - q = _mm_srai_epi32(q, shift); // q >>= shift - q = _mm_sub_epi32(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -////////// SINT64 - -__m128i libdivide_s64_do_vec128(__m128i numers, const struct libdivide_s64_t *denom) { - uint8_t more = denom->more; - int64_t magic = denom->magic; - if (magic == 0) { // shift path - uint32_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - uint64_t mask = (1ULL << shift) - 1; - __m128i roundToZeroTweak = _mm_set1_epi64x(mask); - // q = numer + ((numer >> 63) & roundToZeroTweak); - __m128i q = - _mm_add_epi64(numers, _mm_and_si128(libdivide_s64_signbits(numers), roundToZeroTweak)); - q = libdivide_s64_shift_right_vec128(q, shift); - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q = (q ^ sign) - sign; - q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); - return q; - } else { - __m128i q = libdivide_mullhi_s64_vec128(numers, _mm_set1_epi64x(magic)); - if (more & LIBDIVIDE_ADD_MARKER) { - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - // q += ((numer ^ sign) - sign); - q = _mm_add_epi64(q, _mm_sub_epi64(_mm_xor_si128(numers, sign), sign)); - } - // q >>= denom->mult_path.shift - q = libdivide_s64_shift_right_vec128(q, more & LIBDIVIDE_64_SHIFT_MASK); - q = _mm_add_epi64(q, _mm_srli_epi64(q, 63)); // q += (q < 0) - return q; - } -} - -__m128i libdivide_s64_branchfree_do_vec128( - __m128i numers, const struct libdivide_s64_branchfree_t *denom) { - int64_t magic = denom->magic; - uint8_t more = denom->more; - uint8_t shift = more & LIBDIVIDE_64_SHIFT_MASK; - // must be arithmetic shift - __m128i sign = _mm_set1_epi32((int8_t)more >> 7); - - // libdivide_mullhi_s64(numers, magic); - __m128i q = libdivide_mullhi_s64_vec128(numers, _mm_set1_epi64x(magic)); - q = _mm_add_epi64(q, numers); // q += numers - - // If q is non-negative, we have nothing to do. - // If q is negative, we want to add either (2**shift)-1 if d is - // a power of 2, or (2**shift) if it is not a power of 2. - uint32_t is_power_of_2 = (magic == 0); - __m128i q_sign = libdivide_s64_signbits(q); // q_sign = q >> 63 - __m128i mask = _mm_set1_epi64x((1ULL << shift) - is_power_of_2); - q = _mm_add_epi64(q, _mm_and_si128(q_sign, mask)); // q = q + (q_sign & mask) - q = libdivide_s64_shift_right_vec128(q, shift); // q >>= shift - q = _mm_sub_epi64(_mm_xor_si128(q, sign), sign); // q = (q ^ sign) - sign - return q; -} - -#endif - -/////////// C++ stuff - -#ifdef __cplusplus - -enum Branching { - BRANCHFULL, // use branching algorithms - BRANCHFREE // use branchfree algorithms -}; - -#if defined(LIBDIVIDE_NEON) -// Helper to deduce NEON vector type for integral type. -template -struct NeonVecFor {}; - -template <> -struct NeonVecFor { - typedef uint32x4_t type; -}; - -template <> -struct NeonVecFor { - typedef int32x4_t type; -}; - -template <> -struct NeonVecFor { - typedef uint64x2_t type; -}; - -template <> -struct NeonVecFor { - typedef int64x2_t type; -}; -#endif - -// Versions of our algorithms for SIMD. -#if defined(LIBDIVIDE_NEON) -#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) \ - typename NeonVecFor::type divide(typename NeonVecFor::type n) const { \ - return libdivide_##ALGO##_do_vec128(n, &denom); \ - } -#else -#define LIBDIVIDE_DIVIDE_NEON(ALGO, INT_TYPE) -#endif -#if defined(LIBDIVIDE_SSE2) -#define LIBDIVIDE_DIVIDE_SSE2(ALGO) \ - __m128i divide(__m128i n) const { return libdivide_##ALGO##_do_vec128(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_SSE2(ALGO) -#endif - -#if defined(LIBDIVIDE_AVX2) -#define LIBDIVIDE_DIVIDE_AVX2(ALGO) \ - __m256i divide(__m256i n) const { return libdivide_##ALGO##_do_vec256(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_AVX2(ALGO) -#endif - -#if defined(LIBDIVIDE_AVX512) -#define LIBDIVIDE_DIVIDE_AVX512(ALGO) \ - __m512i divide(__m512i n) const { return libdivide_##ALGO##_do_vec512(n, &denom); } -#else -#define LIBDIVIDE_DIVIDE_AVX512(ALGO) -#endif - -// The DISPATCHER_GEN() macro generates C++ methods (for the given integer -// and algorithm types) that redirect to libdivide's C API. -#define DISPATCHER_GEN(T, ALGO) \ - libdivide_##ALGO##_t denom; \ - dispatcher() {} \ - dispatcher(T d) : denom(libdivide_##ALGO##_gen(d)) {} \ - T divide(T n) const { return libdivide_##ALGO##_do(n, &denom); } \ - T recover() const { return libdivide_##ALGO##_recover(&denom); } \ - LIBDIVIDE_DIVIDE_NEON(ALGO, T) \ - LIBDIVIDE_DIVIDE_SSE2(ALGO) \ - LIBDIVIDE_DIVIDE_AVX2(ALGO) \ - LIBDIVIDE_DIVIDE_AVX512(ALGO) - -// The dispatcher selects a specific division algorithm for a given -// type and ALGO using partial template specialization. -template -struct dispatcher {}; - -template <> -struct dispatcher { - DISPATCHER_GEN(int32_t, s32) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int32_t, s32_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint32_t, u32) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint32_t, u32_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int64_t, s64) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(int64_t, s64_branchfree) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint64_t, u64) -}; -template <> -struct dispatcher { - DISPATCHER_GEN(uint64_t, u64_branchfree) -}; - -// This is the main divider class for use by the user (C++ API). -// The actual division algorithm is selected using the dispatcher struct -// based on the integer and algorithm template parameters. -template -class divider { - public: - // We leave the default constructor empty so that creating - // an array of dividers and then initializing them - // later doesn't slow us down. - divider() {} - - // Constructor that takes the divisor as a parameter - divider(T d) : div(d) {} - - // Divides n by the divisor - T divide(T n) const { return div.divide(n); } - - // Recovers the divisor, returns the value that was - // used to initialize this divider object. - T recover() const { return div.recover(); } - - bool operator==(const divider &other) const { - return div.denom.magic == other.denom.magic && div.denom.more == other.denom.more; - } - - bool operator!=(const divider &other) const { return !(*this == other); } - - // Vector variants treat the input as packed integer values with the same type as the divider - // (e.g. s32, u32, s64, u64) and divides each of them by the divider, returning the packed - // quotients. -#if defined(LIBDIVIDE_SSE2) - __m128i divide(__m128i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_AVX2) - __m256i divide(__m256i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_AVX512) - __m512i divide(__m512i n) const { return div.divide(n); } -#endif -#if defined(LIBDIVIDE_NEON) - typename NeonVecFor::type divide(typename NeonVecFor::type n) const { - return div.divide(n); - } -#endif - - private: - // Storage for the actual divisor - dispatcher::value, std::is_signed::value, sizeof(T), ALGO> div; -}; - -// Overload of operator / for scalar division -template -T operator/(T n, const divider &div) { - return div.divide(n); -} - -// Overload of operator /= for scalar division -template -T &operator/=(T &n, const divider &div) { - n = div.divide(n); - return n; -} - -// Overloads for vector types. -#if defined(LIBDIVIDE_SSE2) -template -__m128i operator/(__m128i n, const divider &div) { - return div.divide(n); -} - -template -__m128i operator/=(__m128i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif -#if defined(LIBDIVIDE_AVX2) -template -__m256i operator/(__m256i n, const divider &div) { - return div.divide(n); -} - -template -__m256i operator/=(__m256i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif -#if defined(LIBDIVIDE_AVX512) -template -__m512i operator/(__m512i n, const divider &div) { - return div.divide(n); -} - -template -__m512i operator/=(__m512i &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif - -#if defined(LIBDIVIDE_NEON) -template -uint32x4_t operator/(uint32x4_t n, const divider &div) { - return div.divide(n); -} - -template -int32x4_t operator/(int32x4_t n, const divider &div) { - return div.divide(n); -} - -template -uint64x2_t operator/(uint64x2_t n, const divider &div) { - return div.divide(n); -} - -template -int64x2_t operator/(int64x2_t n, const divider &div) { - return div.divide(n); -} - -template -uint32x4_t operator/=(uint32x4_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -int32x4_t operator/=(int32x4_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -uint64x2_t operator/=(uint64x2_t &n, const divider &div) { - n = div.divide(n); - return n; -} - -template -int64x2_t operator/=(int64x2_t &n, const divider &div) { - n = div.divide(n); - return n; -} -#endif - -#if __cplusplus >= 201103L || (defined(_MSC_VER) && _MSC_VER >= 1900) -// libdivide::branchfree_divider -template -using branchfree_divider = divider; -#endif - -} // namespace libdivide - -#endif // __cplusplus - -#endif // LIBDIVIDE_H diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c2de0e33d82..436fcbe921c 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -55,7 +55,8 @@ ccache --zero-stats ||: if [ "$BUILD_MUSL_KEEPER" == "1" ] then # build keeper with musl separately - cmake --debug-trycompile -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. + # and without rust bindings + cmake --debug-trycompile -DENABLE_RUST=OFF -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. ninja $NINJA_FLAGS clickhouse-keeper diff --git a/docker/packager/packager b/docker/packager/packager index 7f6bd8818fb..716071fcac6 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -131,7 +131,7 @@ def parse_env_variables( ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" - AMD64_SSE2_SUFFIX = "-amd64sse2" + AMD64_COMPAT_SUFFIX = "-amd64-compat" result = [] result.append("OUTPUT_DIR=/output") @@ -144,7 +144,7 @@ def parse_env_variables( is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) - is_amd64_sse2 = compiler.endswith(AMD64_SSE2_SUFFIX) + is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) if is_cross_darwin: cc = compiler[: -len(DARWIN_SUFFIX)] @@ -197,8 +197,8 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) - elif is_amd64_sse2: - cc = compiler[: -len(AMD64_SSE2_SUFFIX)] + elif is_amd64_compat: + cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") cmake_flags.append("-DNO_SSE3_OR_HIGHER=1") else: @@ -358,7 +358,7 @@ if __name__ == "__main__": "clang-15-aarch64", "clang-15-aarch64-v80compat", "clang-15-ppc64le", - "clang-15-amd64sse2", + "clang-15-amd64-compat", "clang-15-freebsd", "gcc-11", ), diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index 305fc279414..22d6282d71c 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="22.11.2.30" +ARG VERSION="22.12.1.1752" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index f1c4dd097aa..3135ec508de 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -21,7 +21,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="22.11.2.30" +ARG VERSION="22.12.1.1752" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 7359e0a9402..bf6f9bc9e49 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -116,6 +116,7 @@ function clone_submodules contrib/base64 contrib/cctz contrib/libcpuid + contrib/libdivide contrib/double-conversion contrib/llvm-project contrib/lz4 diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index 5ae880ddf36..c16b2bf1087 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -12,6 +12,10 @@ echo '{ "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] }' | dd of=/etc/docker/daemon.json 2>/dev/null +# In case of test hung it is convenient to use pytest --pdb to debug it, +# and on hung you can simply press Ctrl-C and it will spawn a python pdb, +# but on SIGINT dockerd will exit, so ignore it to preserve the daemon. +trap '' INT dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 --default-address-pool base=172.17.0.0/12,size=24 &>/ClickHouse/tests/integration/dockerd.log & set +e diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 234d0861f8b..b67a638188c 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -17,6 +17,7 @@ ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV DATASETS="hits visits" RUN npm install -g azurite +RUN npm install tslib COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index a497d3443b0..40109255a7e 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -80,6 +80,7 @@ ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 RUN npm install -g azurite +RUN npm install tslib COPY run.sh / COPY setup_minio.sh / diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 5cb27d90b62..01e0f5b4897 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -131,19 +131,20 @@ function stop() # Preserve the pid, since the server can hung after the PID will be deleted. pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" - # --max-tries is supported only since 22.12 - if dpkg --compare-versions "$(clickhouse local -q 'select version()')" ge "22.12"; then - # Increase default waiting timeout for sanitizers and debug builds - clickhouse stop --max-tries 180 --do-not-kill && return - else - clickhouse stop --do-not-kill && return + clickhouse stop $max_tries --do-not-kill && return + + if [ -n "$1" ] + then + # temporarily disable it in BC check + clickhouse stop --force + return fi # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. kill -TERM "$(pidof gdb)" ||: sleep 5 echo "thread apply all backtrace (on stop)" >> /test_output/gdb.log - gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log + timeout 30m gdb -batch -ex 'thread apply all backtrace' -p "$pid" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log clickhouse stop --force } @@ -431,7 +432,7 @@ else clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables" - stop + stop 1 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log # Start new server @@ -497,6 +498,7 @@ else -e "Coordination::Exception: Connection loss" \ -e "MutateFromLogEntryTask" \ -e "No connection to ZooKeeper, cannot get shared table ID" \ + -e "Session expired" \ /var/log/clickhouse-server/clickhouse-server.backward.clean.log | zgrep -Fa "" > /test_output/bc_check_error_messages.txt \ && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \ || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 6dc3d05d051..2edf6ba3591 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -19,6 +19,7 @@ def process_result(result_folder): "typos", "whitespaces", "workflows", + "submodules", "docs spelling", ) diff --git a/docker/test/style/run.sh b/docker/test/style/run.sh index 80911bf8627..315efb9e6c4 100755 --- a/docker/test/style/run.sh +++ b/docker/test/style/run.sh @@ -10,7 +10,7 @@ echo "Check style" | ts echo "Check python formatting with black" | ts ./check-black -n |& tee /test_output/black_output.txt echo "Check python type hinting with mypy" | ts -./check-mypy -n |& tee /test_output/mypy_output.txt +./check-mypy -n |& tee /test_output/mypy_output.txt echo "Check typos" | ts ./check-typos |& tee /test_output/typos_output.txt echo "Check docs spelling" | ts @@ -19,6 +19,8 @@ echo "Check whitespaces" | ts ./check-whitespaces -n |& tee /test_output/whitespaces_output.txt echo "Check workflows" | ts ./check-workflows |& tee /test_output/workflows_output.txt +echo "Check submodules" | ts +./check-submodules |& tee /test_output/submodules_output.txt echo "Check shell scripts with shellcheck" | ts ./shellcheck-run.sh |& tee /test_output/shellcheck_output.txt /process_style_check_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv diff --git a/docker/test/testflows/runner/dockerd-entrypoint.sh b/docker/test/testflows/runner/dockerd-entrypoint.sh index 0e15396082a..d310ee583bf 100755 --- a/docker/test/testflows/runner/dockerd-entrypoint.sh +++ b/docker/test/testflows/runner/dockerd-entrypoint.sh @@ -10,6 +10,10 @@ cat > /etc/docker/daemon.json << EOF } EOF +# In case of test hung it is convenient to use pytest --pdb to debug it, +# and on hung you can simply press Ctrl-C and it will spawn a python pdb, +# but on SIGINT dockerd will exit, so ignore it to preserve the daemon. +trap '' INT dockerd --host=unix:///var/run/docker.sock --host=tcp://0.0.0.0:2375 &>/var/log/somefile & set +e diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 30766cb6052..de34897a6f6 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -9,14 +9,22 @@ if [ "${OS}" = "Linux" ] then if [ "${ARCH}" = "x86_64" -o "${ARCH}" = "amd64" ] then - DIR="amd64" + # Require at least x86-64 + SSE4.2 (introduced in 2006). On older hardware fall back to plain x86-64 (introduced in 1999) which + # guarantees at least SSE2. The caveat is that plain x86-64 builds are much less tested than SSE 4.2 builds. + HAS_SSE42=$(grep sse4_2 /proc/cpuinfo) + if [ "${HAS_SSE42}" ] + then + DIR="amd64" + else + DIR="amd64compat" + fi elif [ "${ARCH}" = "aarch64" -o "${ARCH}" = "arm64" ] then # If the system has >=ARMv8.2 (https://en.wikipedia.org/wiki/AArch64), choose the corresponding build, else fall back to a v8.0 # compat build. Unfortunately, the ARM ISA level cannot be read directly, we need to guess from the "features" in /proc/cpuinfo. # Also, the flags in /proc/cpuinfo are named differently than the flags passed to the compiler (cmake/cpu_features.cmake). - ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/') - if [ "${ARMV82}" ] + HAS_ARMV82=$(grep -m 1 'Features' /proc/cpuinfo | awk '/asimd/ && /sha1/ && /aes/ && /atomics/ && /lrcpc/') + if [ "${HAS_ARMV82}" ] then DIR="aarch64" else diff --git a/docs/changelogs/v22.12.1.1752-stable.md b/docs/changelogs/v22.12.1.1752-stable.md new file mode 100644 index 00000000000..9b3d2379277 --- /dev/null +++ b/docs/changelogs/v22.12.1.1752-stable.md @@ -0,0 +1,320 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.12.1.1752-stable (688e488e930) FIXME as compared to v22.11.1.1360-stable (0d211ed1984) + +#### Backward Incompatible Change +* Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). + +#### New Feature +* Add "grace_hash" join_algorithm. [#38191](https://github.com/ClickHouse/ClickHouse/pull/38191) ([BigRedEye](https://github.com/BigRedEye)). +* Merging on initiator now uses the same memory bound approach as merging of local aggregation results if `enable_memory_bound_merging_of_aggregation_results` is set. [#40879](https://github.com/ClickHouse/ClickHouse/pull/40879) ([Nikita Taranov](https://github.com/nickitat)). +* Add BSONEachRow input/output format. In this format, ClickHouse formats/parses each row as a separated BSON Document and each column is formatted/parsed as a single BSON field with column name as a key. [#42033](https://github.com/ClickHouse/ClickHouse/pull/42033) ([mark-polokhov](https://github.com/mark-polokhov)). +* close: [#37631](https://github.com/ClickHouse/ClickHouse/issues/37631). [#42265](https://github.com/ClickHouse/ClickHouse/pull/42265) ([刘陶峰](https://github.com/taofengliu)). +* Added `multiplyDecimal` and `divideDecimal` functions for decimal operations with fixed precision. [#42438](https://github.com/ClickHouse/ClickHouse/pull/42438) ([Andrey Zvonov](https://github.com/zvonand)). +* Added `system.moves` table with list of currently moving parts. [#42660](https://github.com/ClickHouse/ClickHouse/pull/42660) ([Sergei Trifonov](https://github.com/serxa)). +* Keeper feature: add support for embedded Prometheus endpoint. [#43087](https://github.com/ClickHouse/ClickHouse/pull/43087) ([Antonio Andelic](https://github.com/antonio2368)). +* Added age function to calculate difference between two dates or dates with time values expressed as number of full units. Close [#41115](https://github.com/ClickHouse/ClickHouse/issues/41115). [#43123](https://github.com/ClickHouse/ClickHouse/pull/43123) ([Roman Vasin](https://github.com/rvasin)). +* Add settings `max_streams_for_merge_tree_reading` and `allow_asynchronous_read_from_io_pool_for_merge_tree`. Setting `max_streams_for_merge_tree_reading` limits the number of reading streams for MergeTree tables. Setting `allow_asynchronous_read_from_io_pool_for_merge_tree` enables background I/O pool to read from `MergeTree` tables. This may increase performance for I/O bound queries if used together with `max_streams_to_max_threads_ratio` or `max_streams_for_merge_tree_reading`. [#43260](https://github.com/ClickHouse/ClickHouse/pull/43260) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add the expression of the index on `data_skipping_indices` system table. [#43308](https://github.com/ClickHouse/ClickHouse/pull/43308) ([Guillaume Tassery](https://github.com/YiuRULE)). +* New hash function [xxh3](https://github.com/Cyan4973/xxHash) added. Also performance of `xxHash32` and `xxHash64` improved on arm thanks to library update. [#43411](https://github.com/ClickHouse/ClickHouse/pull/43411) ([Nikita Taranov](https://github.com/nickitat)). +* - Temporary data (for external sorting, aggregation, and JOINs) can share storage with the filesystem cache for remote disks and evict it, close [#42158](https://github.com/ClickHouse/ClickHouse/issues/42158). [#43457](https://github.com/ClickHouse/ClickHouse/pull/43457) ([Vladimir C](https://github.com/vdimir)). +* Add column `engine_full` to system table `databases` so that users can access whole engine definition of database via system tables. [#43468](https://github.com/ClickHouse/ClickHouse/pull/43468) ([凌涛](https://github.com/lingtaolf)). +* Add password complexity rules and checks for creating a new user. [#43719](https://github.com/ClickHouse/ClickHouse/pull/43719) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add function concatWithSeparator , like concat_ws in spark. [#43749](https://github.com/ClickHouse/ClickHouse/pull/43749) ([李扬](https://github.com/taiyang-li)). +* Added constraints for merge tree settings. [#43903](https://github.com/ClickHouse/ClickHouse/pull/43903) ([Sergei Trifonov](https://github.com/serxa)). +* Support numeric literals with _ as separator. [#43925](https://github.com/ClickHouse/ClickHouse/pull/43925) ([jh0x](https://github.com/jh0x)). +* Add a new setting `input_format_json_read_objects_as_strings` that allows to parse nested JSON objects into Strings in all JSON input formats. This setting is disable by default. [#44052](https://github.com/ClickHouse/ClickHouse/pull/44052) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Optimisation is getting skipped now if `max_size_to_preallocate_for_aggregation` has too small value. Default value of this setting increased to `10^8`. [#43945](https://github.com/ClickHouse/ClickHouse/pull/43945) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Support numeric literals with underscores. closes [#28967](https://github.com/ClickHouse/ClickHouse/issues/28967). [#39129](https://github.com/ClickHouse/ClickHouse/pull/39129) ([unbyte](https://github.com/unbyte)). +* Add `FROM table SELECT column` syntax. [#41095](https://github.com/ClickHouse/ClickHouse/pull/41095) ([Nikolay Degterinsky](https://github.com/evillique)). +* This PR changes how followed queries delete parts: truncate table, alter table drop part, alter table drop partition. Now these queries make empty parts which cover old parts. This makes truncate query works without exclusive lock which means concurrent reads aren't locked. Also achieved durability in all those queries. If request is succeeded then no resurrected pars appear later. Note that atomicity is achieved only with transaction scope. [#41145](https://github.com/ClickHouse/ClickHouse/pull/41145) ([Sema Checherinda](https://github.com/CheSema)). +* `SET param_x` query no longer requires manual string serialization for the value of the parameter. For example, query `SET param_a = '[\'a\', \'b\']'` can now be written like `SET param_a = ['a', 'b']`. [#41874](https://github.com/ClickHouse/ClickHouse/pull/41874) ([Nikolay Degterinsky](https://github.com/evillique)). +* `filesystemAvailable` and related functions support one optional argument with disk name, and change `filesystemFree` to `filesystemUnreserved`. Closes [#35076](https://github.com/ClickHouse/ClickHouse/issues/35076). [#42064](https://github.com/ClickHouse/ClickHouse/pull/42064) ([flynn](https://github.com/ucasfl)). +* Increased the default value of search_limit to 256, and added LDAP server config option to change that to an arbitrary value. Closes: [#42276](https://github.com/ClickHouse/ClickHouse/issues/42276). [#42461](https://github.com/ClickHouse/ClickHouse/pull/42461) ([Vasily Nemkov](https://github.com/Enmk)). +* Add cosine distance for annoy. [#42778](https://github.com/ClickHouse/ClickHouse/pull/42778) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* Allow to remove sensitive information from the exception messages also. Resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#42940](https://github.com/ClickHouse/ClickHouse/pull/42940) ([filimonov](https://github.com/filimonov)). +* Keeper improvement: Add 4lw command `rqld` which can manually assign a node as leader. [#43026](https://github.com/ClickHouse/ClickHouse/pull/43026) ([JackyWoo](https://github.com/JackyWoo)). +* Apply connection timeouts settings for Distributed async INSERT from the query. [#43156](https://github.com/ClickHouse/ClickHouse/pull/43156) ([Azat Khuzhin](https://github.com/azat)). +* unhex function support FixedString arguments. [issue42369](https://github.com/ClickHouse/ClickHouse/issues/42369). [#43207](https://github.com/ClickHouse/ClickHouse/pull/43207) ([DR](https://github.com/freedomDR)). +* Priority is given to deleting completely expired Parts,related [#42869](https://github.com/ClickHouse/ClickHouse/issues/42869). [#43222](https://github.com/ClickHouse/ClickHouse/pull/43222) ([zhongyuankai](https://github.com/zhongyuankai)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/42484. Mask sensitive information in logs better; mask secret parts in the output of queries `SHOW CREATE TABLE` and `SELECT FROM system.tables`. Also resolves [#41418](https://github.com/ClickHouse/ClickHouse/issues/41418). [#43227](https://github.com/ClickHouse/ClickHouse/pull/43227) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable compress marks and primary key. [#43288](https://github.com/ClickHouse/ClickHouse/pull/43288) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* resolve issue [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) . Right now async insert doesn't support deduplication, because multiple small inserts will coexist in one part, which corespond multiple `block id`s. This solution is straitfoward: The change involves: 1. mark offsets for every inserts in every chunk 2. calculate multiple `block_id`s when sinker receive a chunk 3. get block number lock by these `block_id`s 3.1. if fails, remove the dup insert(s) and dup `block_id`(s) from block and recalculate `offsets` agian. 3.2. if succeeds, commit `block_id`'s and other items into keeper a. if fails, do 3.1 b. if succeeds, everything succeeds. [#43304](https://github.com/ClickHouse/ClickHouse/pull/43304) ([Han Fei](https://github.com/hanfei1991)). +* More precise and reactive CPU load indication on client. [#43307](https://github.com/ClickHouse/ClickHouse/pull/43307) ([Sergei Trifonov](https://github.com/serxa)). +* Restrict default access to named collections for user defined in config. It must have explicit `show_named_collections=1` to be able to see them. [#43325](https://github.com/ClickHouse/ClickHouse/pull/43325) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support reading of subcolumns of nested types from storage `S3` and table function `s3` with formats `Parquet`, `Arrow` and `ORC`. [#43329](https://github.com/ClickHouse/ClickHouse/pull/43329) ([chen](https://github.com/xiedeyantu)). +* - Systemd integration now correctly notifies systemd that service is really started and is ready to server requests. [#43400](https://github.com/ClickHouse/ClickHouse/pull/43400) ([Коренберг Марк](https://github.com/socketpair)). +* Add table_uuid to system.parts. [#43404](https://github.com/ClickHouse/ClickHouse/pull/43404) ([Azat Khuzhin](https://github.com/azat)). +* Added client option to display the number of locally processed rows in non-interactive mode (--print-num-processed-rows). [#43407](https://github.com/ClickHouse/ClickHouse/pull/43407) ([jh0x](https://github.com/jh0x)). +* Show read rows while reading from stdin from client. Closes [#43423](https://github.com/ClickHouse/ClickHouse/issues/43423). [#43442](https://github.com/ClickHouse/ClickHouse/pull/43442) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Keeper improvement: try syncing logs to disk in parallel with replication. [#43450](https://github.com/ClickHouse/ClickHouse/pull/43450) ([Antonio Andelic](https://github.com/antonio2368)). +* Show progress bar while reading from s3 table function / engine. [#43454](https://github.com/ClickHouse/ClickHouse/pull/43454) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Progress bar will show both read and written rows. [#43496](https://github.com/ClickHouse/ClickHouse/pull/43496) ([Ilya Yatsishin](https://github.com/qoega)). +* Implement `aggregation-in-order` optimization on top of query plan. It is enabled by default (but works only together with `optimize_aggregation_in_order`, which is disabled by default). Set `query_plan_aggregation_in_order = 0` to use previous AST-based version. [#43592](https://github.com/ClickHouse/ClickHouse/pull/43592) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Allow to send profile events with `trace_type = 'ProfileEvent'` to `system.trace_log` on each increment with current stack, profile event name and value of increment. It can be enabled by setting `trace_profile_events` and used to debug performance of queries. [#43639](https://github.com/ClickHouse/ClickHouse/pull/43639) ([Anton Popov](https://github.com/CurtizJ)). +* Keeper improvement: requests are batched more often. The batching can be controlled with the new setting `max_requests_quick_batch_size`. [#43686](https://github.com/ClickHouse/ClickHouse/pull/43686) ([Antonio Andelic](https://github.com/antonio2368)). +* Added possibility to use array as a second parameter for cutURLParameter function. Close [#6827](https://github.com/ClickHouse/ClickHouse/issues/6827). [#43788](https://github.com/ClickHouse/ClickHouse/pull/43788) ([Roman Vasin](https://github.com/rvasin)). +* Implement referential dependencies and use them to create tables in the correct order while restoring from a backup. [#43834](https://github.com/ClickHouse/ClickHouse/pull/43834) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add a new setting `input_format_max_binary_string_size` to limit string size in RowBinary format. [#43842](https://github.com/ClickHouse/ClickHouse/pull/43842) ([Kruglov Pavel](https://github.com/Avogar)). +* - Fix some incorrect logic in ast level optimization related. [#43873](https://github.com/ClickHouse/ClickHouse/pull/43873) ([Duc Canh Le](https://github.com/canhld94)). +* Support query like `SHOW FULL TABLES ...`. [#43910](https://github.com/ClickHouse/ClickHouse/pull/43910) ([Filatenkov Artur](https://github.com/FArthur-cmd)). +* When ClickHouse requests a remote HTTP server, and it returns an error, the numeric HTTP code was not displayed correctly in the exception message. Closes [#43919](https://github.com/ClickHouse/ClickHouse/issues/43919). [#43920](https://github.com/ClickHouse/ClickHouse/pull/43920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Settings `merge_tree_min_rows_for_concurrent_read_for_remote_filesystem/merge_tree_min_bytes_for_concurrent_read_for_remote_filesystem` did not respect adaptive granularity. Fat rows did not decrease the number of read rows (as it is was done for `merge_tree_min_rows_for_concurrent_read/merge_tree_min_bytes_for_concurrent_read`, which could lead to high memory usage. [#43965](https://github.com/ClickHouse/ClickHouse/pull/43965) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Support `optimize_if_transform_strings_to_enum` in new analyzer. [#43999](https://github.com/ClickHouse/ClickHouse/pull/43999) ([Antonio Andelic](https://github.com/antonio2368)). +* This is to upgrade the new "DeflateQpl" compression codec which has been implemented on previous PR (details: https://github.com/ClickHouse/ClickHouse/pull/39494). This patch improves codec on below aspects: 1. QPL v0.2.0 to QPL v0.3.0 [Intel® Query Processing Library (QPL)](https://github.com/intel/qpl) 2. Improve CMake file for fixing QPL build issues for QPL v0.3.0。 3. Link the QPL library with libaccel-config at build time instead of runtime loading on QPL v0.2.0 (dlopen) 4. Fixed log print issue in CompressionCodecDeflateQpl.cpp. [#44024](https://github.com/ClickHouse/ClickHouse/pull/44024) ([jasperzhu](https://github.com/jinjunzh)). +* Follow-up to https://github.com/ClickHouse/ClickHouse/pull/43834 Fix review issues; dependencies from `Distributed` table engine and from `cluster()` function are also considered now; as well as dependencies of a dictionary defined without host & port specified. [#44158](https://github.com/ClickHouse/ClickHouse/pull/44158) ([Vitaly Baranov](https://github.com/vitlibar)). + +#### Bug Fix +* Fix mutations not making progress when checksums do not match between replicas (e.g. caused by a change in data format on an upgrade). [#36877](https://github.com/ClickHouse/ClickHouse/pull/36877) ([nvartolomei](https://github.com/nvartolomei)). +* fix skip_unavailable_shards does not work using hdfsCluster table function. [#43236](https://github.com/ClickHouse/ClickHouse/pull/43236) ([chen](https://github.com/xiedeyantu)). +* fix s3 support question mark wildcard. Closes [#42731](https://github.com/ClickHouse/ClickHouse/issues/42731). [#43253](https://github.com/ClickHouse/ClickHouse/pull/43253) ([chen](https://github.com/xiedeyantu)). +* - Fix functions arrayFirstOrNull and arrayLastOrNull or null when array is Nullable. [#43274](https://github.com/ClickHouse/ClickHouse/pull/43274) ([Duc Canh Le](https://github.com/canhld94)). +* - we create a new zk path called "async_blocks" for replicated tables in [#43304](https://github.com/ClickHouse/ClickHouse/issues/43304) . However, for tables created in older versions, this path does not exist and will cause error when doing partition operations. This PR will create this node when initializing replicated tree. - This PR created a flag `async_insert_deduplicate` with `false` default value to control whether to use this function. As mentioned in [#38075](https://github.com/ClickHouse/ClickHouse/issues/38075) , this function is not yet fully finished. I would turn off it by default. [#44223](https://github.com/ClickHouse/ClickHouse/pull/44223) ([Han Fei](https://github.com/hanfei1991)). + +#### Build/Testing/Packaging Improvement +* Add support for FreeBSD/powerpc64le. [#40422](https://github.com/ClickHouse/ClickHouse/pull/40422) ([pkubaj](https://github.com/pkubaj)). +* Bump Testcontainers for Go to v0.15.0. [#43278](https://github.com/ClickHouse/ClickHouse/pull/43278) ([Manuel de la Peña](https://github.com/mdelapenya)). +* ... Enable base64 on s390x > Information about CI checks: https://clickhouse.com/docs/en/development/continuous-integration/. [#43352](https://github.com/ClickHouse/ClickHouse/pull/43352) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Shutdown will be much faster if do not call clearOldPartsFromFilesystem. Especially this is right for tests with zero-copy due to single thread deletion parts. clearOldPartsFromFilesystem is unnecessary after https://github.com/ClickHouse/ClickHouse/pull/41145. [#43760](https://github.com/ClickHouse/ClickHouse/pull/43760) ([Sema Checherinda](https://github.com/CheSema)). +* Integrate skim into the client/local. [#43922](https://github.com/ClickHouse/ClickHouse/pull/43922) ([Azat Khuzhin](https://github.com/azat)). +* Allow clickhouse to use openssl as a dynamic library and in-tree for development purposes. [#43991](https://github.com/ClickHouse/ClickHouse/pull/43991) ([Boris Kuschel](https://github.com/bkuschel)). +* Closes [#43912](https://github.com/ClickHouse/ClickHouse/issues/43912). [#43992](https://github.com/ClickHouse/ClickHouse/pull/43992) ([Nikolay Degterinsky](https://github.com/evillique)). +* Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Fixed unable to log in (because of failure to create session_log entry) in rare case of messed up setting profiles. ... [#42641](https://github.com/ClickHouse/ClickHouse/pull/42641) ([Vasily Nemkov](https://github.com/Enmk)). +* Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). +* Do not suppress exceptions in web disk. Fix retries for web disk. [#42800](https://github.com/ClickHouse/ClickHouse/pull/42800) ([Azat Khuzhin](https://github.com/azat)). +* Fixed race condition between inserts and dropping MVs. [#43161](https://github.com/ClickHouse/ClickHouse/pull/43161) ([AlfVII](https://github.com/AlfVII)). +* Fixed bug which could lead to deadlock while using asynchronous inserts. [#43233](https://github.com/ClickHouse/ClickHouse/pull/43233) ([Anton Popov](https://github.com/CurtizJ)). +* Additional check on zero uncompressed size is added to `CompressionCodecDelta`. [#43255](https://github.com/ClickHouse/ClickHouse/pull/43255) ([Nikita Taranov](https://github.com/nickitat)). +* An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix bad cast from LowCardinality column when using short circuit function execution. Proper fix of https://github.com/ClickHouse/ClickHouse/pull/42937. [#43311](https://github.com/ClickHouse/ClickHouse/pull/43311) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `DESCRIBE` for `deltaLake` and `hudi` table functions. [#43323](https://github.com/ClickHouse/ClickHouse/pull/43323) ([Antonio Andelic](https://github.com/antonio2368)). +* Check and compare the content of `format_version` file in `MergeTreeData` so tables can be loaded even if the storage policy was changed. [#43328](https://github.com/ClickHouse/ClickHouse/pull/43328) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix possible (very unlikely) "No column to rollback" logical error during INSERT into Buffer. [#43336](https://github.com/ClickHouse/ClickHouse/pull/43336) ([Azat Khuzhin](https://github.com/azat)). +* Fix a bug that allowed FucntionParser to parse an unlimited amount of round brackets into one function if `allow_function_parameters` is set. [#43350](https://github.com/ClickHouse/ClickHouse/pull/43350) ([Nikolay Degterinsky](https://github.com/evillique)). +* MaterializeMySQL support ddl: drop table t1,t2 and Compatible with most of MySQL drop ddl. [#43366](https://github.com/ClickHouse/ClickHouse/pull/43366) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix possible `Cannot create non-empty column with type Nothing` in functions if/multiIf. Closes [#43356](https://github.com/ClickHouse/ClickHouse/issues/43356). [#43368](https://github.com/ClickHouse/ClickHouse/pull/43368) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix a bug when row level filter uses default value of column. [#43387](https://github.com/ClickHouse/ClickHouse/pull/43387) ([Alexander Gololobov](https://github.com/davenger)). +* Query with DISTINCT + LIMIT BY + LIMIT can return fewer rows than expected. Fixes [#43377](https://github.com/ClickHouse/ClickHouse/issues/43377). [#43410](https://github.com/ClickHouse/ClickHouse/pull/43410) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). +* Fix date_diff() for hour/minute on macOS. Close [#42742](https://github.com/ClickHouse/ClickHouse/issues/42742). [#43466](https://github.com/ClickHouse/ClickHouse/pull/43466) ([zzsmdfj](https://github.com/zzsmdfj)). +* Fix incorrect memory accounting because of merges/mutations. [#43516](https://github.com/ClickHouse/ClickHouse/pull/43516) ([Azat Khuzhin](https://github.com/azat)). +* Substitute UDFs in `CREATE` query to avoid failures during loading at the startup. Additionally, UDFs can now be used as `DEFAULT` expressions for columns. [#43539](https://github.com/ClickHouse/ClickHouse/pull/43539) ([Antonio Andelic](https://github.com/antonio2368)). +* Correctly report errors in queries even when multiple JOINs optimization is taking place. [#43583](https://github.com/ClickHouse/ClickHouse/pull/43583) ([Salvatore](https://github.com/tbsal)). +* Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). +* - Ensure consistency when copier update status and `attach_is_done` in keeper after partition attach is done. [#43602](https://github.com/ClickHouse/ClickHouse/pull/43602) ([lizhuoyu5](https://github.com/lzydmxy)). +* During recovering of the lost replica there could a situation where we need to atomically swap two table names (use EXCHANGE), but instead previously we tried to use two RENAME queries. Which was obviously failed and moreover failed the whole recovery process of the database replica. [#43628](https://github.com/ClickHouse/ClickHouse/pull/43628) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* fix s3Cluster function returns NOT_FOUND_COLUMN_IN_BLOCK error. Closes [#43534](https://github.com/ClickHouse/ClickHouse/issues/43534). [#43629](https://github.com/ClickHouse/ClickHouse/pull/43629) ([chen](https://github.com/xiedeyantu)). +* Optimized number of List requests to ZooKeeper when selecting a part to merge. Previously it could produce thousands of requests in some cases. Fixes [#43647](https://github.com/ClickHouse/ClickHouse/issues/43647). [#43675](https://github.com/ClickHouse/ClickHouse/pull/43675) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix posssible logical error 'Array sizes mismatched' while parsing JSON object with arrays with same key names but with different nesting level. Closes [#43569](https://github.com/ClickHouse/ClickHouse/issues/43569). [#43693](https://github.com/ClickHouse/ClickHouse/pull/43693) ([Kruglov Pavel](https://github.com/Avogar)). +* Fixed possible exception in case of distributed group by with an alias column among aggregation keys. [#43709](https://github.com/ClickHouse/ClickHouse/pull/43709) ([Nikita Taranov](https://github.com/nickitat)). +* Fix bug which can lead to broken projections if zero-copy replication is enabled and used. [#43764](https://github.com/ClickHouse/ClickHouse/pull/43764) ([alesapin](https://github.com/alesapin)). +* - Fix using multipart upload for large S3 objects in AWS S3. [#43824](https://github.com/ClickHouse/ClickHouse/pull/43824) ([ianton-ru](https://github.com/ianton-ru)). +* Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). +* * Fix logical error in right storage join with using. [#43963](https://github.com/ClickHouse/ClickHouse/pull/43963) ([Vladimir C](https://github.com/vdimir)). +* Keeper fix: throw if interserver port for Raft is already in use. Fix segfault in Prometheus when Raft server failed to initialize. [#43984](https://github.com/ClickHouse/ClickHouse/pull/43984) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix order by positional arg in case unneeded columns pruning. Closes [#43964](https://github.com/ClickHouse/ClickHouse/issues/43964). [#43987](https://github.com/ClickHouse/ClickHouse/pull/43987) ([Kseniia Sumarokova](https://github.com/kssenii)). +* * Fix bug with wrong order of keys in Storage Join. [#44012](https://github.com/ClickHouse/ClickHouse/pull/44012) ([Vladimir C](https://github.com/vdimir)). +* Fixed exception when subquery contains having but doesn't contain actual aggregation. [#44051](https://github.com/ClickHouse/ClickHouse/pull/44051) ([Nikita Taranov](https://github.com/nickitat)). +* Fix race in s3 multipart upload. This race could cause the error `Part number must be an integer between 1 and 10000, inclusive. (S3_ERROR)` while restoring from a backup. [#44065](https://github.com/ClickHouse/ClickHouse/pull/44065) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Prevent dropping nested column if it creates empty part. [#44159](https://github.com/ClickHouse/ClickHouse/pull/44159) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `LOGICAL_ERROR` in case when fetch of part was stopped while fetching projection to the disk with enabled zero-copy replication. [#44173](https://github.com/ClickHouse/ClickHouse/pull/44173) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible Bad cast from type DB::IAST const* to DB::ASTLiteral const*. Closes [#44191](https://github.com/ClickHouse/ClickHouse/issues/44191). [#44192](https://github.com/ClickHouse/ClickHouse/pull/44192) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)). + +#### Build Improvement + +* Fixed Endian issues in hex string conversion on s390x (which is not supported by ClickHouse). [#41245](https://github.com/ClickHouse/ClickHouse/pull/41245) ([Harry Lee](https://github.com/HarryLeeIBM)). +* ... toDateTime64 conversion generates wrong time on z build, add bit_cast swap fix to support toDateTime64 on s390x platform. [#42847](https://github.com/ClickHouse/ClickHouse/pull/42847) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* ... s390x support for ip coding functions. [#43078](https://github.com/ClickHouse/ClickHouse/pull/43078) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fix byte order issue of wide integers for s390x. [#43228](https://github.com/ClickHouse/ClickHouse/pull/43228) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed endian issue in bloom filter serialization for s390x. [#43642](https://github.com/ClickHouse/ClickHouse/pull/43642) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed setting TCP_KEEPIDLE of client connection for s390x. [#43850](https://github.com/ClickHouse/ClickHouse/pull/43850) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fix endian issue in StringHashTable for s390x. [#44049](https://github.com/ClickHouse/ClickHouse/pull/44049) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "S3 request per second rate throttling""'. [#43335](https://github.com/ClickHouse/ClickHouse/pull/43335) ([Sergei Trifonov](https://github.com/serxa)). +* NO CL ENTRY: 'Update version after release'. [#43348](https://github.com/ClickHouse/ClickHouse/pull/43348) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Add table_uuid to system.parts"'. [#43571](https://github.com/ClickHouse/ClickHouse/pull/43571) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix endian issue in integer hex string conversion"'. [#43613](https://github.com/ClickHouse/ClickHouse/pull/43613) ([Vladimir C](https://github.com/vdimir)). +* NO CL ENTRY: 'Update replication.md'. [#43643](https://github.com/ClickHouse/ClickHouse/pull/43643) ([Peignon Melvyn](https://github.com/melvynator)). +* NO CL ENTRY: 'Revert "Temporary files evict fs cache"'. [#43883](https://github.com/ClickHouse/ClickHouse/pull/43883) ([Vladimir C](https://github.com/vdimir)). +* NO CL ENTRY: 'Update html interface doc'. [#44064](https://github.com/ClickHouse/ClickHouse/pull/44064) ([San](https://github.com/santrancisco)). +* NO CL ENTRY: 'Revert "Add function 'age'"'. [#44203](https://github.com/ClickHouse/ClickHouse/pull/44203) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Builtin skim"'. [#44227](https://github.com/ClickHouse/ClickHouse/pull/44227) ([Azat Khuzhin](https://github.com/azat)). +* NO CL ENTRY: 'Revert "Add information about written rows in progress indicator"'. [#44255](https://github.com/ClickHouse/ClickHouse/pull/44255) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Build libcxx and libcxxabi from llvm-project [#42730](https://github.com/ClickHouse/ClickHouse/pull/42730) ([Robert Schulze](https://github.com/rschu1ze)). +* Allow release only from ready commits [#43019](https://github.com/ClickHouse/ClickHouse/pull/43019) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add global flags to base/ libraries [#43082](https://github.com/ClickHouse/ClickHouse/pull/43082) ([Raúl Marín](https://github.com/Algunenano)). +* Enable strict typing check in tests/ci [#43132](https://github.com/ClickHouse/ClickHouse/pull/43132) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add server UUID for disks access checks (read/read-by-offset/write/delete) to avoid possible races [#43143](https://github.com/ClickHouse/ClickHouse/pull/43143) ([Azat Khuzhin](https://github.com/azat)). +* Do not include libcxx library for C [#43166](https://github.com/ClickHouse/ClickHouse/pull/43166) ([Azat Khuzhin](https://github.com/azat)). +* Followup fixes for FuseFunctionsPass [#43217](https://github.com/ClickHouse/ClickHouse/pull/43217) ([Vladimir C](https://github.com/vdimir)). +* Fix bug in replication queue which can lead to premature mutation finish [#43231](https://github.com/ClickHouse/ClickHouse/pull/43231) ([alesapin](https://github.com/alesapin)). +* Support `CREATE / ALTER / DROP NAMED COLLECTION` queries under according access types [#43252](https://github.com/ClickHouse/ClickHouse/pull/43252) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix race in `IColumn::dumpStructure` [#43269](https://github.com/ClickHouse/ClickHouse/pull/43269) ([Anton Popov](https://github.com/CurtizJ)). +* Sanitize thirdparty libraries for public flags [#43275](https://github.com/ClickHouse/ClickHouse/pull/43275) ([Azat Khuzhin](https://github.com/azat)). +* stress: increase timeout for server waiting after TERM [#43277](https://github.com/ClickHouse/ClickHouse/pull/43277) ([Azat Khuzhin](https://github.com/azat)). +* Fix cloning of ASTIdentifier [#43282](https://github.com/ClickHouse/ClickHouse/pull/43282) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix race on write in `ReplicatedMergeTree` [#43289](https://github.com/ClickHouse/ClickHouse/pull/43289) ([Antonio Andelic](https://github.com/antonio2368)). +* Cancel lambda api url [#43295](https://github.com/ClickHouse/ClickHouse/pull/43295) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed: Typo [#43312](https://github.com/ClickHouse/ClickHouse/pull/43312) ([Raevsky Rudolf](https://github.com/lanesket)). +* Analyzer small fixes [#43321](https://github.com/ClickHouse/ClickHouse/pull/43321) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix: make test_read_only_table more stable [#43326](https://github.com/ClickHouse/ClickHouse/pull/43326) ([Igor Nikonov](https://github.com/devcrafter)). +* Make insertRangeFrom() more exception safe [#43338](https://github.com/ClickHouse/ClickHouse/pull/43338) ([Azat Khuzhin](https://github.com/azat)). +* Analyzer added indexes support [#43341](https://github.com/ClickHouse/ClickHouse/pull/43341) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow to "drop tables" from s3_plain disk (so as from web disk) [#43343](https://github.com/ClickHouse/ClickHouse/pull/43343) ([Azat Khuzhin](https://github.com/azat)). +* Add --max-consecutive-errors for clickhouse-benchmark [#43344](https://github.com/ClickHouse/ClickHouse/pull/43344) ([Azat Khuzhin](https://github.com/azat)). +* Add [#43072](https://github.com/ClickHouse/ClickHouse/issues/43072) [#43345](https://github.com/ClickHouse/ClickHouse/pull/43345) ([Nikita Taranov](https://github.com/nickitat)). +* Suggest users installation troubleshooting [#43346](https://github.com/ClickHouse/ClickHouse/pull/43346) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v22.11.1.1360-stable [#43349](https://github.com/ClickHouse/ClickHouse/pull/43349) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Provide full stacktrace in case of uncaught exception during server startup [#43364](https://github.com/ClickHouse/ClickHouse/pull/43364) ([Azat Khuzhin](https://github.com/azat)). +* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Splitting checks in CI more [#43373](https://github.com/ClickHouse/ClickHouse/pull/43373) ([alesapin](https://github.com/alesapin)). +* Update version_date.tsv and changelogs after v22.8.9.24-lts [#43393](https://github.com/ClickHouse/ClickHouse/pull/43393) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix mess with signed sizes in SingleValueDataString [#43401](https://github.com/ClickHouse/ClickHouse/pull/43401) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add a comment [#43403](https://github.com/ClickHouse/ClickHouse/pull/43403) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Avoid race condition for updating system.distribution_queue values [#43406](https://github.com/ClickHouse/ClickHouse/pull/43406) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky 01926_order_by_desc_limit [#43408](https://github.com/ClickHouse/ClickHouse/pull/43408) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible heap-use-after-free in local if history file cannot be created [#43409](https://github.com/ClickHouse/ClickHouse/pull/43409) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test [#43435](https://github.com/ClickHouse/ClickHouse/pull/43435) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix backward compatibility check [#43436](https://github.com/ClickHouse/ClickHouse/pull/43436) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix typo [#43446](https://github.com/ClickHouse/ClickHouse/pull/43446) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove noise from logs about NetLink in Docker [#43447](https://github.com/ClickHouse/ClickHouse/pull/43447) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Modify test slightly [#43448](https://github.com/ClickHouse/ClickHouse/pull/43448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Set run_passes to 1 by default [#43451](https://github.com/ClickHouse/ClickHouse/pull/43451) ([Dmitry Novik](https://github.com/novikd)). +* Do not reuse jemalloc memory in test_global_overcommit [#43453](https://github.com/ClickHouse/ClickHouse/pull/43453) ([Dmitry Novik](https://github.com/novikd)). +* Fix createTableSharedID again [#43458](https://github.com/ClickHouse/ClickHouse/pull/43458) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use smaller buffer for small files [#43460](https://github.com/ClickHouse/ClickHouse/pull/43460) ([Alexander Gololobov](https://github.com/davenger)). +* Merging [#42064](https://github.com/ClickHouse/ClickHouse/issues/42064) [#43461](https://github.com/ClickHouse/ClickHouse/pull/43461) ([Anton Popov](https://github.com/CurtizJ)). +* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid possible DROP hung due to attached web disk [#43489](https://github.com/ClickHouse/ClickHouse/pull/43489) ([Azat Khuzhin](https://github.com/azat)). +* Improve fuzzy search in clickhouse-client/clickhouse-local [#43498](https://github.com/ClickHouse/ClickHouse/pull/43498) ([Azat Khuzhin](https://github.com/azat)). +* check ast limits for create_parser_fuzzer [#43504](https://github.com/ClickHouse/ClickHouse/pull/43504) ([Sema Checherinda](https://github.com/CheSema)). +* Add another test for SingleDataValueString [#43514](https://github.com/ClickHouse/ClickHouse/pull/43514) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Move password reset message from client to server [#43517](https://github.com/ClickHouse/ClickHouse/pull/43517) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Sync everything to persistent storage to avoid writeback affects perf tests [#43530](https://github.com/ClickHouse/ClickHouse/pull/43530) ([Azat Khuzhin](https://github.com/azat)). +* bump lib for diag [#43538](https://github.com/ClickHouse/ClickHouse/pull/43538) ([Dale McDiarmid](https://github.com/gingerwizard)). +* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer SumIfToCountIfPass fix [#43543](https://github.com/ClickHouse/ClickHouse/pull/43543) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer UniqInjectiveFunctionsEliminationPass [#43547](https://github.com/ClickHouse/ClickHouse/pull/43547) ([Maksim Kita](https://github.com/kitaisreal)). +* Disable broken 00176_bson_parallel_parsing [#43550](https://github.com/ClickHouse/ClickHouse/pull/43550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add benchmark for query interpretation with JOINs [#43556](https://github.com/ClickHouse/ClickHouse/pull/43556) ([Raúl Marín](https://github.com/Algunenano)). +* Analyzer table functions untuple fix [#43572](https://github.com/ClickHouse/ClickHouse/pull/43572) ([Maksim Kita](https://github.com/kitaisreal)). +* Prepare CI for universal runners preallocated pool [#43579](https://github.com/ClickHouse/ClickHouse/pull/43579) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Iterate list without index-based access [#43584](https://github.com/ClickHouse/ClickHouse/pull/43584) ([Alexander Gololobov](https://github.com/davenger)). +* Remove code that I do not understand [#43593](https://github.com/ClickHouse/ClickHouse/pull/43593) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add table_uuid to system.parts (resubmit) [#43595](https://github.com/ClickHouse/ClickHouse/pull/43595) ([Azat Khuzhin](https://github.com/azat)). +* Move perf tests for Aarch64 from PRs to master [#43623](https://github.com/ClickHouse/ClickHouse/pull/43623) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky 01175_distributed_ddl_output_mode_long [#43626](https://github.com/ClickHouse/ClickHouse/pull/43626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Speedup backup config loading [#43627](https://github.com/ClickHouse/ClickHouse/pull/43627) ([Alexander Gololobov](https://github.com/davenger)). +* Fix [#43478](https://github.com/ClickHouse/ClickHouse/issues/43478) [#43636](https://github.com/ClickHouse/ClickHouse/pull/43636) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Optimize binary-builder size [#43654](https://github.com/ClickHouse/ClickHouse/pull/43654) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky `KeeperMap` integration tests [#43658](https://github.com/ClickHouse/ClickHouse/pull/43658) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix data race in `Keeper` snapshot [#43663](https://github.com/ClickHouse/ClickHouse/pull/43663) ([Antonio Andelic](https://github.com/antonio2368)). +* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update AsynchronousReadIndirectBufferFromRemoteFS.cpp [#43667](https://github.com/ClickHouse/ClickHouse/pull/43667) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix pagination issue in GITHUB_JOB_ID() [#43681](https://github.com/ClickHouse/ClickHouse/pull/43681) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try fix flaky test 00176_bson_parallel_parsing [#43696](https://github.com/ClickHouse/ClickHouse/pull/43696) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix log messages in clickhouse-copier [#43707](https://github.com/ClickHouse/ClickHouse/pull/43707) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* try to remove clickhouse if already exists [#43728](https://github.com/ClickHouse/ClickHouse/pull/43728) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix 43622 [#43731](https://github.com/ClickHouse/ClickHouse/pull/43731) ([Amos Bird](https://github.com/amosbird)). +* Fix example of colored prompt in client [#43738](https://github.com/ClickHouse/ClickHouse/pull/43738) ([Azat Khuzhin](https://github.com/azat)). +* Minor fixes in annoy index documentation [#43743](https://github.com/ClickHouse/ClickHouse/pull/43743) ([Robert Schulze](https://github.com/rschu1ze)). +* Terminate lost runners [#43756](https://github.com/ClickHouse/ClickHouse/pull/43756) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update README.md [#43759](https://github.com/ClickHouse/ClickHouse/pull/43759) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix included_elements calculation in AggregateFunctionNullVariadic [#43763](https://github.com/ClickHouse/ClickHouse/pull/43763) ([Dmitry Novik](https://github.com/novikd)). +* Migrate runner_token_rotation_lambda to zip-package deployment [#43766](https://github.com/ClickHouse/ClickHouse/pull/43766) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer compound expression crash fix [#43768](https://github.com/ClickHouse/ClickHouse/pull/43768) ([Maksim Kita](https://github.com/kitaisreal)). +* Migrate termination lambda to zip-package [#43769](https://github.com/ClickHouse/ClickHouse/pull/43769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix flaky `test_store_cleanup` [#43770](https://github.com/ClickHouse/ClickHouse/pull/43770) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Attempt to fix StyleCheck condition [#43773](https://github.com/ClickHouse/ClickHouse/pull/43773) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rerun PullRequestCI on changed description body [#43777](https://github.com/ClickHouse/ClickHouse/pull/43777) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add google benchmark to contrib [#43779](https://github.com/ClickHouse/ClickHouse/pull/43779) ([Nikita Taranov](https://github.com/nickitat)). +* Fix EN doc as in [#43765](https://github.com/ClickHouse/ClickHouse/issues/43765) [#43780](https://github.com/ClickHouse/ClickHouse/pull/43780) ([Alexander Gololobov](https://github.com/davenger)). +* Detach threads from thread group [#43781](https://github.com/ClickHouse/ClickHouse/pull/43781) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try making `test_keeper_zookeeper_converter` less flaky [#43789](https://github.com/ClickHouse/ClickHouse/pull/43789) ([Antonio Andelic](https://github.com/antonio2368)). +* Polish UDF substitution visitor [#43790](https://github.com/ClickHouse/ClickHouse/pull/43790) ([Antonio Andelic](https://github.com/antonio2368)). +* Analyzer ConstantNode refactoring [#43793](https://github.com/ClickHouse/ClickHouse/pull/43793) ([Maksim Kita](https://github.com/kitaisreal)). +* Update Poco [#43802](https://github.com/ClickHouse/ClickHouse/pull/43802) ([Alexander Gololobov](https://github.com/davenger)). +* Add another BC check suppression [#43810](https://github.com/ClickHouse/ClickHouse/pull/43810) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: fix 01676_long_clickhouse_client_autocomplete flakiness [#43819](https://github.com/ClickHouse/ClickHouse/pull/43819) ([Azat Khuzhin](https://github.com/azat)). +* Use disk operation to serialize and deserialize meta files of StorageFilelog [#43826](https://github.com/ClickHouse/ClickHouse/pull/43826) ([flynn](https://github.com/ucasfl)). +* Add constexpr [#43827](https://github.com/ClickHouse/ClickHouse/pull/43827) ([zhanglistar](https://github.com/zhanglistar)). +* Do not postpone removal of in-memory tables [#43833](https://github.com/ClickHouse/ClickHouse/pull/43833) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase some logging level for keeper client. [#43835](https://github.com/ClickHouse/ClickHouse/pull/43835) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* FuseFunctionsPass small fix [#43837](https://github.com/ClickHouse/ClickHouse/pull/43837) ([Maksim Kita](https://github.com/kitaisreal)). +* Followup fixes for XML helpers [#43845](https://github.com/ClickHouse/ClickHouse/pull/43845) ([Alexander Gololobov](https://github.com/davenger)). +* Hold ProcessListEntry a bit longer in case of exception from Interpreter [#43847](https://github.com/ClickHouse/ClickHouse/pull/43847) ([Alexander Tokmakov](https://github.com/tavplubix)). +* A little improve performance of PODArray [#43860](https://github.com/ClickHouse/ClickHouse/pull/43860) ([zhanglistar](https://github.com/zhanglistar)). +* Change email for robot-clickhouse to immutable one [#43861](https://github.com/ClickHouse/ClickHouse/pull/43861) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rerun DocsCheck on edited PR description [#43862](https://github.com/ClickHouse/ClickHouse/pull/43862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Temporarily disable misc-* slow clang-tidy checks [#43863](https://github.com/ClickHouse/ClickHouse/pull/43863) ([Robert Schulze](https://github.com/rschu1ze)). +* do not leave tmp part on disk, do not go to the keeper for remove it [#43866](https://github.com/ClickHouse/ClickHouse/pull/43866) ([Sema Checherinda](https://github.com/CheSema)). +* do not read part status just for logging [#43868](https://github.com/ClickHouse/ClickHouse/pull/43868) ([Sema Checherinda](https://github.com/CheSema)). +* Analyzer Context refactoring [#43884](https://github.com/ClickHouse/ClickHouse/pull/43884) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer CTE resolution fix [#43893](https://github.com/ClickHouse/ClickHouse/pull/43893) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve release script [#43894](https://github.com/ClickHouse/ClickHouse/pull/43894) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Join engine works with analyzer [#43897](https://github.com/ClickHouse/ClickHouse/pull/43897) ([Vladimir C](https://github.com/vdimir)). +* Fix reports [#43904](https://github.com/ClickHouse/ClickHouse/pull/43904) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix vim settings (and make it compatible with neovim) [#43909](https://github.com/ClickHouse/ClickHouse/pull/43909) ([Azat Khuzhin](https://github.com/azat)). +* Fix clang tidy errors introduced in [#43834](https://github.com/ClickHouse/ClickHouse/issues/43834) [#43911](https://github.com/ClickHouse/ClickHouse/pull/43911) ([Nikita Taranov](https://github.com/nickitat)). +* Fix BACKUP TO S3 for Google Cloud Storage [#43940](https://github.com/ClickHouse/ClickHouse/pull/43940) ([Azat Khuzhin](https://github.com/azat)). +* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Generate missed changelogs for latest releases [#43944](https://github.com/ClickHouse/ClickHouse/pull/43944) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix typo in tests/ci/bugfix_validate_check.py [#43973](https://github.com/ClickHouse/ClickHouse/pull/43973) ([Vladimir C](https://github.com/vdimir)). +* Remove test logging of signal "EINTR" [#44001](https://github.com/ClickHouse/ClickHouse/pull/44001) ([Kruglov Pavel](https://github.com/Avogar)). +* Some cleanup of isDeterministic(InScopeOfQuery)() [#44011](https://github.com/ClickHouse/ClickHouse/pull/44011) ([Robert Schulze](https://github.com/rschu1ze)). +* Try to keep runners alive for longer [#44015](https://github.com/ClickHouse/ClickHouse/pull/44015) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix relaxed "too many parts" threshold [#44021](https://github.com/ClickHouse/ClickHouse/pull/44021) ([Sergei Trifonov](https://github.com/serxa)). +* Correct CompressionCodecGorilla exception message [#44023](https://github.com/ClickHouse/ClickHouse/pull/44023) ([Duc Canh Le](https://github.com/canhld94)). +* Fix exception message [#44034](https://github.com/ClickHouse/ClickHouse/pull/44034) ([Nikolay Degterinsky](https://github.com/evillique)). +* Update version_date.tsv and changelogs after v22.8.11.15-lts [#44035](https://github.com/ClickHouse/ClickHouse/pull/44035) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* do not hardlink serialization.json in new part [#44036](https://github.com/ClickHouse/ClickHouse/pull/44036) ([Sema Checherinda](https://github.com/CheSema)). +* Fix tracing of profile events [#44045](https://github.com/ClickHouse/ClickHouse/pull/44045) ([Anton Popov](https://github.com/CurtizJ)). +* Slightly better clickhouse disks and remove DiskMemory [#44050](https://github.com/ClickHouse/ClickHouse/pull/44050) ([alesapin](https://github.com/alesapin)). +* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Merging [#36877](https://github.com/ClickHouse/ClickHouse/issues/36877) [#44059](https://github.com/ClickHouse/ClickHouse/pull/44059) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* add changelogs [#44061](https://github.com/ClickHouse/ClickHouse/pull/44061) ([Dan Roscigno](https://github.com/DanRoscigno)). +* Fix the CACHE_PATH creation for default value [#44079](https://github.com/ClickHouse/ClickHouse/pull/44079) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix aspell [#44090](https://github.com/ClickHouse/ClickHouse/pull/44090) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer aggregate function lambda crash fix [#44098](https://github.com/ClickHouse/ClickHouse/pull/44098) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix -Wshorten-64-to-32 on FreeBSD and enable -Werror [#44121](https://github.com/ClickHouse/ClickHouse/pull/44121) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test `02497_trace_events_stress_long` [#44124](https://github.com/ClickHouse/ClickHouse/pull/44124) ([Anton Popov](https://github.com/CurtizJ)). +* Minor file renaming [#44125](https://github.com/ClickHouse/ClickHouse/pull/44125) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix typo [#44127](https://github.com/ClickHouse/ClickHouse/pull/44127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better descriptions of signals [#44129](https://github.com/ClickHouse/ClickHouse/pull/44129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* make calls to be sure that parts are deleted [#44156](https://github.com/ClickHouse/ClickHouse/pull/44156) ([Sema Checherinda](https://github.com/CheSema)). +* Ignore "session expired" errors after BC check [#44157](https://github.com/ClickHouse/ClickHouse/pull/44157) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix incorrect assertion [#44160](https://github.com/ClickHouse/ClickHouse/pull/44160) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Close GRPC channels in tests [#44184](https://github.com/ClickHouse/ClickHouse/pull/44184) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove misleading message from logs [#44190](https://github.com/ClickHouse/ClickHouse/pull/44190) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Minor clang-tidy fixes in fromUnixTimestamp64() [#44194](https://github.com/ClickHouse/ClickHouse/pull/44194) ([Igor Nikonov](https://github.com/devcrafter)). +* Hotfix for "check_status.tsv doesn't exists" in stress tests [#44197](https://github.com/ClickHouse/ClickHouse/pull/44197) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix documentation after [#42438](https://github.com/ClickHouse/ClickHouse/issues/42438) [#44200](https://github.com/ClickHouse/ClickHouse/pull/44200) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix an assertion in transactions [#44202](https://github.com/ClickHouse/ClickHouse/pull/44202) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add log message [#44237](https://github.com/ClickHouse/ClickHouse/pull/44237) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v22.9.7.34-stable.md b/docs/changelogs/v22.9.7.34-stable.md new file mode 100644 index 00000000000..042347b3815 --- /dev/null +++ b/docs/changelogs/v22.9.7.34-stable.md @@ -0,0 +1,37 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.9.7.34-stable (613fe09ca2e) FIXME as compared to v22.9.6.20-stable (ef6343f9579) + +#### Bug Fix +* Backported in [#43099](https://github.com/ClickHouse/ClickHouse/issues/43099): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). + +#### Build/Testing/Packaging Improvement +* Backported in [#44111](https://github.com/ClickHouse/ClickHouse/issues/44111): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#43612](https://github.com/ClickHouse/ClickHouse/issues/43612): Fix bad inefficiency of `remote_filesystem_read_method=read` with filesystem cache. Closes [#42125](https://github.com/ClickHouse/ClickHouse/issues/42125). [#42129](https://github.com/ClickHouse/ClickHouse/pull/42129) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Backported in [#43526](https://github.com/ClickHouse/ClickHouse/issues/43526): Fix incorrect UserTimeMicroseconds/SystemTimeMicroseconds accounting. [#42791](https://github.com/ClickHouse/ClickHouse/pull/42791) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43518](https://github.com/ClickHouse/ClickHouse/issues/43518): Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43752](https://github.com/ClickHouse/ClickHouse/issues/43752): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). +* Backported in [#43618](https://github.com/ClickHouse/ClickHouse/issues/43618): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). +* Backported in [#43887](https://github.com/ClickHouse/ClickHouse/issues/43887): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). +* Backported in [#44145](https://github.com/ClickHouse/ClickHouse/issues/44145): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Fix multipart upload for large S3 object, backport to 22.9'. [#44219](https://github.com/ClickHouse/ClickHouse/pull/44219) ([ianton-ru](https://github.com/ianton-ru)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Use only PRs to our repository in pr_info on push [#43895](https://github.com/ClickHouse/ClickHouse/pull/43895) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 8982a3bc0a4..01b246326cf 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -33,6 +33,13 @@ On Ubuntu/Debian you can use the automatic installation script (check [official sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` +Note: in case of troubles, you can also use this: + +```bash +sudo apt-get install software-properties-common +sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test +``` + For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html). #### Use the latest clang for Builds diff --git a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md index 5a2c0718610..49f90d1c292 100644 --- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md @@ -34,7 +34,7 @@ For a description of request parameters, see [request description](../../../sql- `columns` - a tuple with the names of columns where values will be summarized. Optional parameter. The columns must be of a numeric type and must not be in the primary key. - If `columns` not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key. + If `columns` is not specified, ClickHouse summarizes the values in all columns with a numeric data type that are not in the primary key. ### Query clauses diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index ba2381d3c01..8950d9b1aef 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -8,6 +8,10 @@ sidebar_label: Buffer Buffers the data to write in RAM, periodically flushing it to another table. During the read operation, data is read from the buffer and the other table simultaneously. +:::note +A recommended alternative to the Buffer Table Engine is enabling [asynchronous inserts](/docs/en/guides/best-practices/asyncinserts.md). +::: + ``` sql Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) ``` @@ -24,7 +28,7 @@ Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_ #### num_layers -`num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. Recommended value: 16. +`num_layers` – Parallelism layer. Physically, the table will be represented as `num_layers` of independent buffers. #### min_time, max_time, min_rows, max_rows, min_bytes, and max_bytes @@ -34,11 +38,11 @@ Conditions for flushing data from the buffer. #### flush_time, flush_rows, and flush_bytes -Conditions for flushing data from the buffer, that will happen only in background (omitted or zero means no `flush*` parameters). +Conditions for flushing data from the buffer in the background (omitted or zero means no `flush*` parameters). Data is flushed from the buffer and written to the destination table if all the `min*` conditions or at least one `max*` condition are met. -Also, if at least one `flush*` condition are met flush initiated in background, this is different from `max*`, since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` (into `Buffer`) queries. +Also, if at least one `flush*` condition is met, a flush is initiated in the background. This differs from `max*` since `flush*` allows you to configure background flushes separately to avoid adding latency for `INSERT` queries into Buffer tables. #### min_time, max_time, and flush_time @@ -52,48 +56,54 @@ Condition for the number of rows in the buffer. Condition for the number of bytes in the buffer. -During the write operation, data is inserted to a `num_layers` number of random buffers. Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer. +During the write operation, data is inserted into one or more random buffers (configured with `num_layers`). Or, if the data part to insert is large enough (greater than `max_rows` or `max_bytes`), it is written directly to the destination table, omitting the buffer. The conditions for flushing the data are calculated separately for each of the `num_layers` buffers. For example, if `num_layers = 16` and `max_bytes = 100000000`, the maximum RAM consumption is 1.6 GB. Example: ``` sql -CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10, 100, 10000, 1000000, 10000000, 100000000) +CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 1, 10, 100, 10000, 1000000, 10000000, 100000000) ``` -Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. 16 buffers are created. The data in each of them is flushed if either 100 seconds have passed, or one million rows have been written, or 100 MB of data have been written; or if simultaneously 10 seconds have passed and 10,000 rows and 10 MB of data have been written. For example, if just one row has been written, after 100 seconds it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner. +Creating a `merge.hits_buffer` table with the same structure as `merge.hits` and using the Buffer engine. When writing to this table, data is buffered in RAM and later written to the ‘merge.hits’ table. A single buffer is created and the data is flushed if either: +- 100 seconds have passed since the last flush (`max_time`) or +- 1 million rows have been written (`max_rows`) or +- 100 MB of data have been written (`max_bytes`) or +- 10 seconds have passed (`min_time`) and 10,000 rows (`min_rows`) and 10 MB (`min_bytes`) of data have been written -When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffer data is also flushed to the destination table. +For example, if just one row has been written, after 100 seconds, it will be flushed, no matter what. But if many rows have been written, the data will be flushed sooner. + +When the server is stopped, with `DROP TABLE` or `DETACH TABLE`, buffered data is also flushed to the destination table. You can set empty strings in single quotation marks for the database and table name. This indicates the absence of a destination table. In this case, when the data flush conditions are reached, the buffer is simply cleared. This may be useful for keeping a window of data in memory. When reading from a Buffer table, data is processed both from the buffer and from the destination table (if there is one). -Note that the Buffer tables does not support an index. In other words, data in the buffer is fully scanned, which might be slow for large buffers. (For data in a subordinate table, the index that it supports will be used.) +Note that the Buffer table does not support an index. In other words, data in the buffer is fully scanned, which might be slow for large buffers. (For data in a subordinate table, the index that it supports will be used.) If the set of columns in the Buffer table does not match the set of columns in a subordinate table, a subset of columns that exist in both tables is inserted. If the types do not match for one of the columns in the Buffer table and a subordinate table, an error message is entered in the server log, and the buffer is cleared. -The same thing happens if the subordinate table does not exist when the buffer is flushed. +The same happens if the subordinate table does not exist when the buffer is flushed. :::warning -Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. It is advisable to check that this error is fixed in your release before trying to run ALTER on the Buffer table. +Running ALTER on the Buffer table in releases made before 26 Oct 2021 will cause a `Block structure mismatch` error (see [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) and [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), so deleting the Buffer table and then recreating is the only option. Check that this error is fixed in your release before trying to run ALTER on the Buffer table. ::: If the server is restarted abnormally, the data in the buffer is lost. -`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table, but are not used for processing data in the buffer. If these features are required we recommend only using the Buffer table for writing, while reading from the destination table. +`FINAL` and `SAMPLE` do not work correctly for Buffer tables. These conditions are passed to the destination table but are not used for processing data in the buffer. If these features are required, we recommend only using the Buffer table for writing while reading from the destination table. -When adding data to a Buffer, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table. +When adding data to a Buffer table, one of the buffers is locked. This causes delays if a read operation is simultaneously being performed from the table. -Data that is inserted to a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1. +Data that is inserted into a Buffer table may end up in the subordinate table in a different order and in different blocks. Because of this, a Buffer table is difficult to use for writing to a CollapsingMergeTree correctly. To avoid problems, you can set `num_layers` to 1. If the destination table is replicated, some expected characteristics of replicated tables are lost when writing to a Buffer table. The random changes to the order of rows and sizes of data parts cause data deduplication to quit working, which means it is not possible to have a reliable ‘exactly once’ write to replicated tables. Due to these disadvantages, we can only recommend using a Buffer table in rare cases. -A Buffer table is used when too many INSERTs are received from a large number of servers over a unit of time and data can’t be buffered before insertion, which means the INSERTs can’t run fast enough. +A Buffer table is used when too many INSERTs are received from a large number of servers over a unit of time, and data can’t be buffered before insertion, which means the INSERTs can’t run fast enough. -Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second, while inserting larger blocks of data can produce over a million rows per second (see the section “Performance”). +Note that it does not make sense to insert data one row at a time, even for Buffer tables. This will only produce a speed of a few thousand rows per second while inserting larger blocks of data can produce over a million rows per second. [Original article](https://clickhouse.com/docs/en/engines/table-engines/special/buffer/) diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index 391d3a3f59a..53f885e3963 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -6,10 +6,11 @@ slug: /en/install # Installing ClickHouse -You have two options for getting up and running with ClickHouse: +You have three options for getting up and running with ClickHouse: -- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse -- **[Self-managed ClickHouse](https://github.com/ClickHouse/ClickHouse):** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture +- **[ClickHouse Cloud](https://clickhouse.com/cloud/):** The official ClickHouse as a service, - built by, maintained and supported by the creators of ClickHouse +- **[Self-managed ClickHouse](#self-managed-install):** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86-64, ARM, or PowerPC64LE CPU architecture +- **[Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/):** Read the guide with the official image in Docker Hub ## ClickHouse Cloud @@ -22,73 +23,49 @@ The quickest and easiest way to get up and running with ClickHouse is to create Once your Cloud service is provisioned, you will be able to [connect to it](/docs/en/integrations/connect-a-client.md) and start [inserting data](/docs/en/integrations/data-ingestion.md). -:::note -The [Quick Start](/docs/en/quick-start.mdx) walks through the steps to get a ClickHouse Cloud service up and running, connecting to it, and inserting data. -::: - -## Self-Managed Requirements - -### CPU Architecture - -ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture. - -Official pre-built binaries are typically compiled for x86_64 and leverage SSE 4.2 instruction set, so unless otherwise stated usage of CPU that supports it becomes an additional system requirement. Here’s the command to check if current CPU has support for SSE 4.2: - -``` bash -$ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not supported" -``` - -To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments. - -ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz. - -It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload. - -### RAM {#ram} - -We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries. - -The required volume of RAM depends on: - -- The complexity of queries. -- The amount of data that is processed in queries. - -To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use. - -ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details. - -### Swap File {#swap-file} - -Disable the swap file for production environments. - -### Storage Subsystem {#storage-subsystem} - -You need to have 2GB of free disk space to install ClickHouse. - -The volume of storage required for your data should be calculated separately. Assessment should include: - -- Estimation of the data volume. - - You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. - -- The data compression coefficient. - - To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. - -To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. - -### Network {#network} - -If possible, use networks of 10G or higher class. - -The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes. - -### Software {#software} - -ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system. ## Self-Managed Install +1. The simplest way to download ClickHouse locally is to run the following command. If your operating system is supported, an appropriate ClickHouse binary will be downloaded and made runnable: + ```bash + curl https://clickhouse.com/ | sh + ``` + +1. Run the `install` command, which defines a collection of useful symlinks along with the files and folders used by ClickHouse - all of which you can see in the output of the install script: + ```bash + sudo ./clickhouse install + ``` + +1. At the end of the install script, you are prompted for a password for the `default` user. Feel free to enter a password, or you can optionally leave it blank: + ```response + Creating log directory /var/log/clickhouse-server. + Creating data directory /var/lib/clickhouse. + Creating pid directory /var/run/clickhouse-server. + chown -R clickhouse:clickhouse '/var/log/clickhouse-server' + chown -R clickhouse:clickhouse '/var/run/clickhouse-server' + chown clickhouse:clickhouse '/var/lib/clickhouse' + Enter password for default user: + ``` + You should see the following output: + ```response + ClickHouse has been successfully installed. + + Start clickhouse-server with: + sudo clickhouse start + + Start clickhouse-client with: + clickhouse-client + ``` + +1. Run the following command to start the ClickHouse server: + ```bash + sudo clickhouse start + ``` + +:::tip +The [Quick Start](/docs/en/quick-start.mdx/#step-1-get-clickhouse) walks through the steps to download and run ClickHouse, connect to it, and insert data. +::: + ## Available Installation Options {#available-installation-options} ### From DEB Packages {#install-from-deb-packages} @@ -278,50 +255,16 @@ For production environments, it’s recommended to use the latest `stable`-versi To run ClickHouse inside Docker follow the guide on [Docker Hub](https://hub.docker.com/r/clickhouse/clickhouse-server/). Those images use official `deb` packages inside. -### Single Binary {#from-single-binary} - -You can install ClickHouse on Linux using a single portable binary from the latest commit of the `master` branch: [https://builds.clickhouse.com/master/amd64/clickhouse]. - -``` bash -curl -O 'https://builds.clickhouse.com/master/amd64/clickhouse' && chmod a+x clickhouse -sudo ./clickhouse install -``` - -### From Precompiled Binaries for Non-Standard Environments {#from-binaries-non-linux} - -For non-Linux operating systems and for AArch64 CPU architecture, ClickHouse builds are provided as a cross-compiled binary from the latest commit of the `master` branch (with a few hours delay). - -- [MacOS x86_64](https://builds.clickhouse.com/master/macos/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/macos/clickhouse' && chmod a+x ./clickhouse - ``` -- [MacOS Aarch64 (Apple Silicon)](https://builds.clickhouse.com/master/macos-aarch64/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/macos-aarch64/clickhouse' && chmod a+x ./clickhouse - ``` -- [FreeBSD x86_64](https://builds.clickhouse.com/master/freebsd/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/freebsd/clickhouse' && chmod a+x ./clickhouse - ``` -- [Linux AArch64](https://builds.clickhouse.com/master/aarch64/clickhouse) - ```bash - curl -O 'https://builds.clickhouse.com/master/aarch64/clickhouse' && chmod a+x ./clickhouse - ``` - -Run `sudo ./clickhouse install` to install ClickHouse system-wide (also with needed configuration files, configuring users etc.). Then run `sudo clickhouse start` commands to start the clickhouse-server and `clickhouse-client` to connect to it. - -Use the `clickhouse client` to connect to the server, or `clickhouse local` to process local data. - ### From Sources {#from-sources} To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [Mac OS X](/docs/en/development/build-osx.md). -You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs. +You can compile packages and install them or use programs without installing packages. - Client: programs/clickhouse-client - Server: programs/clickhouse-server + Client: /programs/clickhouse-client + Server: /programs/clickhouse-server -You’ll need to create a data and metadata folders and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are: +You’ll need to create data and metadata folders manually and `chown` them for the desired user. Their paths can be changed in server config (src/programs/server/config.xml), by default they are: /var/lib/clickhouse/data/default/ /var/lib/clickhouse/metadata/default/ @@ -406,3 +349,42 @@ SELECT 1 **Congratulations, the system works!** To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md). + +## Recommendations for Self-Managed ClickHouse + +ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86-64, ARM, or PowerPC64LE CPU architecture. + +ClickHouse uses all hardware resources available to process data. + +ClickHouse tends to work more efficiently with a large number of cores at a lower clock rate than with fewer cores at a higher clock rate. + +We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but queries will then frequently abort. + +The required volume of RAM generally depends on: + +- The complexity of queries. +- The amount of data that is processed in queries. + +To calculate the required volume of RAM, you may estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use. + +To reduce memory consumption, ClickHouse can swap temporary data to external storage. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details. + +We recommend to disable the operating system's swap file in production environments. + +The ClickHouse binary requires at least 2.5 GB of disk space for installation. + +The volume of storage required for your data may be calculated separately based on + +- an estimation of the data volume. + + You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store. + +- The data compression coefficient. + + To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times. + +To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas. + +For distributed ClickHouse deployments (clustering), we recommend at least 10G class network connectivity. + +Network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes. diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 58998a6f491..b7ef859f974 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1415,7 +1415,7 @@ Differs from [PrettySpaceNoEscapes](#prettyspacenoescapes) in that up to 10,000 ## RowBinary {#rowbinary} -Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators. +Formats and parses data by row in binary format. Rows and values are listed consecutively, without separators. Because data is in the binary format the delimiter after `FORMAT RowBinary` is strictly specified as next: any number of whitespaces (`' '` - space, code `0x20`; `'\t'` - tab, code `0x09`; `'\f'` - form feed, code `0x0C`) followed by exactly one new line sequence (Windows style `"\r\n"` or Unix style `'\n'`), immediately followed by binary data. This format is less efficient than the Native format since it is row-based. Integers use fixed-length little-endian representation. For example, UInt64 uses 8 bytes. diff --git a/docs/en/operations/settings/constraints-on-settings.md b/docs/en/operations/settings/constraints-on-settings.md index bb015f80834..4857bcd30c0 100644 --- a/docs/en/operations/settings/constraints-on-settings.md +++ b/docs/en/operations/settings/constraints-on-settings.md @@ -92,7 +92,7 @@ Code: 452, e.displayText() = DB::Exception: Setting force_index_by_date should n **Note:** the `default` profile has special handling: all the constraints defined for the `default` profile become the default constraints, so they restrict all the users until they’re overridden explicitly for these users. ## Constraints on Merge Tree Settings -It is possible to set constraints for [merge tree settings](merge-tree-settings.md). There constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `` section. +It is possible to set constraints for [merge tree settings](merge-tree-settings.md). These constraints are applied when table with merge tree engine is created or its storage settings are altered. Name of merge tree setting must be prepended by `merge_tree_` prefix when referenced in `` section. **Example:** Forbid to create new tables with explicitly specified `storage_policy` diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ddfaab02159..645a38a7f04 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -3588,6 +3588,31 @@ y Nullable(String) z IPv4 ``` +## schema_inference_make_columns_nullable {#schema_inference_make_columns_nullable} + +Controls making inferred types `Nullable` in schema inference for formats without information about nullability. +If the setting is enabled, the inferred type will be `Nullable` only if column contains `NULL` in a sample that is parsed during schema inference. + +Default value: `false`. + +## input_format_try_infer_integers {#input_format_try_infer_integers} + +If enabled, ClickHouse will try to infer integers instead of floats in schema inference for text formats. If all numbers in the column from input data are integers, the result type will be `Int64`, if at least one number is float, the result type will be `Float64`. + +Enabled by default. + +## input_format_try_infer_dates {#input_format_try_infer_dates} + +If enabled, ClickHouse will try to infer type `Date` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as dates, the result type will be `Date`, if at least one field was not parsed as date, the result type will be `String`. + +Enabled by default. + +## input_format_try_infer_datetimes {#input_format_try_infer_datetimes} + +If enabled, ClickHouse will try to infer type `DateTime64` from string fields in schema inference for text formats. If all fields from a column in input data were successfully parsed as datetimes, the result type will be `DateTime64`, if at least one field was not parsed as datetime, the result type will be `String`. + +Enabled by default. + ## date_time_input_format {#date_time_input_format} Allows choosing a parser of the text representation of date and time. diff --git a/docs/en/operations/system-tables/databases.md b/docs/en/operations/system-tables/databases.md index cd90c94c480..8f0cc6e56d2 100644 --- a/docs/en/operations/system-tables/databases.md +++ b/docs/en/operations/system-tables/databases.md @@ -13,6 +13,7 @@ Columns: - `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — Metadata path. - `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID. - `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment. +- `engine_full` ([String](../../sql-reference/data-types/enum.md)) — Parameters of the database engine. The `name` column from this system table is used for implementing the `SHOW DATABASES` query. @@ -31,10 +32,12 @@ SELECT * FROM system.databases; ``` ``` text -┌─name───────────────┬─engine─┬─data_path──────────────────┬─metadata_path───────────────────────────────────────────────────────┬─uuid─────────────────────────────────┬─comment─┐ -│ INFORMATION_SCHEMA │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │ │ -│ default │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/d31/d317b4bd-3595-4386-81ee-c2334694128a/ │ 24363899-31d7-42a0-a436-389931d752a0 │ │ -│ information_schema │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │ │ -│ system │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/1d1/1d1c869d-e465-4b1b-a51f-be033436ebf9/ │ 03e9f3d1-cc88-4a49-83e9-f3d1cc881a49 │ │ -└────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┴─────────┘ +┌─name────────────────┬─engine─────┬─data_path────────────────────┬─metadata_path─────────────────────────────────────────────────────────┬─uuid─────────────────────────────────┬─engine_full────────────────────────────────────────────┬─comment─┐ +│ INFORMATION_SCHEMA │ Memory │ /data/clickhouse_data/ │ │ 00000000-0000-0000-0000-000000000000 │ Memory │ │ +│ default │ Atomic │ /data/clickhouse_data/store/ │ /data/clickhouse_data/store/f97/f97a3ceb-2e8a-4912-a043-c536e826a4d4/ │ f97a3ceb-2e8a-4912-a043-c536e826a4d4 │ Atomic │ │ +│ information_schema │ Memory │ /data/clickhouse_data/ │ │ 00000000-0000-0000-0000-000000000000 │ Memory │ │ +│ replicated_database │ Replicated │ /data/clickhouse_data/store/ │ /data/clickhouse_data/store/da8/da85bb71-102b-4f69-9aad-f8d6c403905e/ │ da85bb71-102b-4f69-9aad-f8d6c403905e │ Replicated('some/path/database', 'shard1', 'replica1') │ │ +│ system │ Atomic │ /data/clickhouse_data/store/ │ /data/clickhouse_data/store/b57/b5770419-ac7a-4b67-8229-524122024076/ │ b5770419-ac7a-4b67-8229-524122024076 │ Atomic │ │ +└─────────────────────┴────────────┴──────────────────────────────┴───────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┴────────────────────────────────────────────────────────┴─────────┘ + ``` diff --git a/docs/en/sql-reference/data-types/float.md b/docs/en/sql-reference/data-types/float.md index 8bf2e4007da..cd3905f54b2 100644 --- a/docs/en/sql-reference/data-types/float.md +++ b/docs/en/sql-reference/data-types/float.md @@ -6,6 +6,26 @@ sidebar_label: Float32, Float64 # Float32, Float64 +:::warning +If you need accurate calculations, in particular if you work with financial or business data requiring a high precision you should consider using Decimal instead. Floats might lead to inaccurate results as illustrated below: + +``` +CREATE TABLE IF NOT EXISTS float_vs_decimal +( + my_float Float64, + my_decimal Decimal64(3) +)Engine=MergeTree ORDER BY tuple() + +INSERT INTO float_vs_decimal SELECT round(canonicalRand(), 3) AS res, res FROM system.numbers LIMIT 1000000; # Generate 1 000 000 random number with 2 decimal places and store them as a float and as a decimal + +SELECT sum(my_float), sum(my_decimal) FROM float_vs_decimal; +> 500279.56300000014 500279.563 + +SELECT sumKahan(my_float), sumKahan(my_decimal) FROM float_vs_decimal; +> 500279.563 500279.563 +``` +::: + [Floating point numbers](https://en.wikipedia.org/wiki/IEEE_754). Types are equivalent to types of C: @@ -13,8 +33,6 @@ Types are equivalent to types of C: - `Float32` — `float`. - `Float64` — `double`. -We recommend that you store data in integer form whenever possible. For example, convert fixed precision numbers to integer values, such as monetary amounts or page load times in milliseconds. - Aliases: - `Float32` — `FLOAT`. diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index aac0db208c6..bf88b9cedf2 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -607,3 +607,7 @@ dictGetString('prefix', 'asn', tuple(IPv6StringToNum('2001:db8::1'))) Other types are not supported yet. The function returns the attribute for the prefix that corresponds to this IP address. If there are overlapping prefixes, the most specific one is returned. Data must completely fit into RAM. + +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md index e4edad4d9a1..8e9dbd392aa 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md @@ -136,3 +136,7 @@ or SOURCE(CLICKHOUSE(... update_field 'added_time' update_lag 15)) ... ``` + +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 4eb96fe80a2..c5d48945649 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -824,3 +824,7 @@ Setting fields: :::note The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: + +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index 881630167e3..8271a342941 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -176,3 +176,6 @@ Configuration fields: - [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md). +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md index 76ca3ac978f..a923511ca5e 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md @@ -52,3 +52,6 @@ LIFETIME(...) -- Lifetime of dictionary in memory - [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) — Structure of the dictionary . A key and attributes that can be retrieved by this key. - [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) — Frequency of dictionary updates. +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) \ No newline at end of file diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 9f922a2cccb..8621c68b428 100644 --- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -79,3 +79,6 @@ You can convert values for a small dictionary by describing it in a `SELECT` que - [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md) - [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md) +## Related Content + +- [Using dictionaries to accelerate queries](https://clickhouse.com/blog/faster-queries-dictionaries-clickhouse) diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 6cecc3f01da..6156a823d58 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -410,35 +410,35 @@ Converts a date with time to a certain fixed date, while preserving the time. ## toRelativeYearNum -Converts a date or date with time to the number of the year, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the year, starting from a certain fixed point in the past. ## toRelativeQuarterNum -Converts a date or date with time to the number of the quarter, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the quarter, starting from a certain fixed point in the past. ## toRelativeMonthNum -Converts a date or date with time to the number of the month, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the month, starting from a certain fixed point in the past. ## toRelativeWeekNum -Converts a date or date with time to the number of the week, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the week, starting from a certain fixed point in the past. ## toRelativeDayNum -Converts a date or date with time to the number of the day, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the day, starting from a certain fixed point in the past. ## toRelativeHourNum -Converts a date or date with time to the number of the hour, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the hour, starting from a certain fixed point in the past. ## toRelativeMinuteNum -Converts a date or date with time to the number of the minute, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the minute, starting from a certain fixed point in the past. ## toRelativeSecondNum -Converts a date or date with time to the number of the second, starting from a certain fixed point in the past. +Converts a date with time or date to the number of the second, starting from a certain fixed point in the past. ## toISOYear @@ -517,154 +517,6 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` -## age - -Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. -E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. - - -**Syntax** - -``` sql -age('unit', startdate, enddate, [timezone]) -``` - -**Arguments** - -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). - Possible values: - - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) - -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). - -**Returned value** - -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). - -**Example** - -Query: - -``` sql -SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Result: - -``` text -┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 24 │ -└───────────────────────────────────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT - toDate('2022-01-01') AS e, - toDate('2021-12-29') AS s, - age('day', s, e) AS day_age, - age('month', s, e) AS month__age, - age('year', s, e) AS year_age; -``` - -Result: - -``` text -┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐ -│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │ -└────────────┴────────────┴─────────┴────────────┴──────────┘ -``` - - -## date\_diff - -Returns the count of the specified `unit` boundaries crossed between the `startdate` and `enddate`. -The difference is calculated using relative units, e.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). - -**Syntax** - -``` sql -date_diff('unit', startdate, enddate, [timezone]) -``` - -Aliases: `dateDiff`, `DATE_DIFF`. - -**Arguments** - -- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). - Possible values: - - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) - -- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). - -**Returned value** - -Difference between `enddate` and `startdate` expressed in `unit`. - -Type: [Int](../../sql-reference/data-types/int-uint.md). - -**Example** - -Query: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Result: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - -Query: - -``` sql -SELECT - toDate('2022-01-01') AS e, - toDate('2021-12-29') AS s, - dateDiff('day', s, e) AS day_diff, - dateDiff('month', s, e) AS month__diff, - dateDiff('year', s, e) AS year_diff; -``` - -Result: - -``` text -┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐ -│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │ -└────────────┴────────────┴──────────┴─────────────┴───────────┘ -``` - ## date\_trunc Truncates date and time data to the specified part of date. @@ -785,6 +637,80 @@ Result: └───────────────────────────────────────────────┘ ``` +## date\_diff + +Returns the difference between two dates or dates with time values. +The difference is calculated using relative units, e.g. the difference between `2022-01-01` and `2021-12-29` is 3 days for day unit (see [toRelativeDayNum](#torelativedaynum)), 1 month for month unit (see [toRelativeMonthNum](#torelativemonthnum)), 1 year for year unit (see [toRelativeYearNum](#torelativeyearnum)). + +**Syntax** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Aliases: `dateDiff`, `DATE_DIFF`. + +**Arguments** + +- `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). + Possible values: + + - `second` + - `minute` + - `hour` + - `day` + - `week` + - `month` + - `quarter` + - `year` + +- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +Difference between `enddate` and `startdate` expressed in `unit`. + +Type: [Int](../../sql-reference/data-types/int-uint.md). + +**Example** + +Query: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Result: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT + toDate('2022-01-01') AS e, + toDate('2021-12-29') AS s, + dateDiff('day', s, e) AS day_diff, + dateDiff('month', s, e) AS month__diff, + dateDiff('year', s, e) AS year_diff; +``` + +Result: + +``` text +┌──────────e─┬──────────s─┬─day_diff─┬─month__diff─┬─year_diff─┐ +│ 2022-01-01 │ 2021-12-29 │ 3 │ 1 │ 1 │ +└────────────┴────────────┴──────────┴─────────────┴───────────┘ +``` + ## date\_sub Subtracts the time interval or date interval from the provided date or date with time. diff --git a/docs/en/sql-reference/functions/index.md b/docs/en/sql-reference/functions/index.md index 840bcd583e4..22e79ec6623 100644 --- a/docs/en/sql-reference/functions/index.md +++ b/docs/en/sql-reference/functions/index.md @@ -296,3 +296,7 @@ Another example is the `hostName` function, which returns the name of the server If a function in a query is performed on the requestor server, but you need to perform it on remote servers, you can wrap it in an ‘any’ aggregate function or add it to a key in `GROUP BY`. + +## Related Content + +- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index cdbf29f3e6d..2e2b280d1d6 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -1159,4 +1159,40 @@ If s is empty, the result is 0. If the first character is not an ASCII character +## concatWithSeparator +Returns the concatenation strings separated by string separator. If any of the argument values is `NULL`, the function returns `NULL`. + +**Syntax** + +``` sql +concatWithSeparator(sep, expr1, expr2, expr3...) +``` + +**Arguments** +- sep — separator. Const [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). +- exprN — expression to be concatenated. [String](../../sql-reference/data-types/string.md) or [FixedString](../../sql-reference/data-types/fixedstring.md). + +**Returned values** +- The concatenated String. + +**Example** + +Query: + +``` sql +SELECT concatWithSeparator('a', '1', '2', '3', '4') +``` + +Result: + +``` text +┌─concatWithSeparator('a', '1', '2', '3', '4')─┐ +│ 1a2a3a4 │ +└───────────────────────────────────┘ +``` + +## concatWithSeparatorAssumeInjective +Same as concatWithSeparator, the difference is that you need to ensure that concatWithSeparator(sep, expr1, expr2, expr3...) → result is injective, it will be used for optimization of GROUP BY. + +The function is named “injective” if it always returns different result for different values of arguments. In other words: different arguments never yield identical result. diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md index 90be007bf43..80d20e8ccad 100644 --- a/docs/en/sql-reference/statements/create/function.md +++ b/docs/en/sql-reference/statements/create/function.md @@ -58,3 +58,7 @@ Result: │ 2 │ even │ └────────┴──────────────────────────────────────┘ ``` + +## Related Content + +- [User-defined functions in ClickHouse Cloud](https://clickhouse.com/blog/user-defined-functions-clickhouse-udfs) diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 837022a424f..0a2a832d2fc 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -77,8 +77,9 @@ Numeric literal tries to be parsed: Literal value has the smallest type that the value fits in. For example, 1 is parsed as `UInt8`, but 256 is parsed as `UInt16`. For more information, see [Data types](../sql-reference/data-types/index.md). +Underscores `_` inside numeric literals are ignored and can be used for better readability. -Examples: `1`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. +Examples: `1`, `10_000_000`, `0xffff_ffff`, `18446744073709551615`, `0xDEADBEEF`, `01`, `0.1`, `1e100`, `-1e-100`, `inf`, `nan`. ### String diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index e4665ff0c3e..59c77d082cf 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -948,7 +948,7 @@ $ watch -n1 "clickhouse-client --query='SELECT event, value FROM system.events F ## RowBinary {#rowbinary} -Форматирует и парсит данные по строкам, в бинарном виде. Строки и значения уложены подряд, без разделителей. +Форматирует и парсит данные по строкам, в бинарном виде. Строки и значения уложены подряд, без разделителей. Так как данные представлены в бинарном виде, разделитель после `FORMAT RowBinary` строго определен в следующем виде: любое количество пробелов (`' '` - space, код `0x20`; `'\t'` - tab, код `0x09`; `'\f'` - form feed, код `0x0C`), следующая за этим одна последовательность конца строки (Windows style `"\r\n"` или Unix style `'\n'`), и непосредственно следующие за этим бинарные данные. Формат менее эффективен, чем формат Native, так как является строковым. Числа представлены в little endian формате фиксированной длины. Для примера, UInt64 занимает 8 байт. diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 8fbcaf9568b..f430f5cae51 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -424,23 +424,23 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d ## toRelativeYearNum {#torelativeyearnum} -Переводит дату или дату-с-временем в номер года, начиная с некоторого фиксированного момента в прошлом. +Переводит дату-с-временем или дату в номер года, начиная с некоторого фиксированного момента в прошлом. ## toRelativeQuarterNum {#torelativequarternum} -Переводит дату или дату-с-временем в номер квартала, начиная с некоторого фиксированного момента в прошлом. +Переводит дату-с-временем или дату в номер квартала, начиная с некоторого фиксированного момента в прошлом. ## toRelativeMonthNum {#torelativemonthnum} -Переводит дату или дату-с-временем в номер месяца, начиная с некоторого фиксированного момента в прошлом. +Переводит дату-с-временем или дату в номер месяца, начиная с некоторого фиксированного момента в прошлом. ## toRelativeWeekNum {#torelativeweeknum} -Переводит дату или дату-с-временем в номер недели, начиная с некоторого фиксированного момента в прошлом. +Переводит дату-с-временем или дату в номер недели, начиная с некоторого фиксированного момента в прошлом. ## toRelativeDayNum {#torelativedaynum} -Переводит дату или дату-с-временем в номер дня, начиная с некоторого фиксированного момента в прошлом. +Переводит дату-с-временем или дату в номер дня, начиная с некоторого фиксированного момента в прошлом. ## toRelativeHourNum {#torelativehournum} @@ -456,7 +456,7 @@ WITH toDateTime64('2020-01-01 10:20:30.999', 3) AS dt64 SELECT toStartOfSecond(d ## toISOYear {#toisoyear} -Переводит дату или дату-с-временем в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. +Переводит дату-с-временем или дату в число типа UInt16, содержащее номер ISO года. ISO год отличается от обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) ISO год начинается необязательно первого января. **Пример** @@ -479,7 +479,7 @@ SELECT ## toISOWeek {#toisoweek} -Переводит дату или дату-с-временем в число типа UInt8, содержащее номер ISO недели. +Переводит дату-с-временем или дату в число типа UInt8, содержащее номер ISO недели. Начало ISO года отличается от начала обычного года, потому что в соответствии с [ISO 8601:1988](https://en.wikipedia.org/wiki/ISO_8601) первая неделя года - это неделя с четырьмя или более днями в этом году. 1 Января 2017 г. - воскресение, т.е. первая ISO неделя 2017 года началась в понедельник 2 января, поэтому 1 января 2017 это последняя неделя 2016 года. @@ -503,7 +503,7 @@ SELECT ``` ## toWeek(date\[, mode\]\[, timezone\]) {#toweek} -Переводит дату или дату-с-временем в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0. +Переводит дату-с-временем или дату в число UInt8, содержащее номер недели. Второй аргументам mode задает режим, начинается ли неделя с воскресенья или с понедельника и должно ли возвращаемое значение находиться в диапазоне от 0 до 53 или от 1 до 53. Если аргумент mode опущен, то используется режим 0. `toISOWeek() ` эквивалентно `toWeek(date,3)`. @@ -569,132 +569,6 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d └────────────┴───────────┴───────────┴───────────┘ ``` -## age - -Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. -Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. - -**Синтаксис** - -``` sql -age('unit', startdate, enddate, [timezone]) -``` - -**Аргументы** - -- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). - Возможные значения: - - - `second` (возможные сокращения: `ss`, `s`) - - `minute` (возможные сокращения: `mi`, `n`) - - `hour` (возможные сокращения: `hh`, `h`) - - `day` (возможные сокращения: `dd`, `d`) - - `week` (возможные сокращения: `wk`, `ww`) - - `month` (возможные сокращения: `mm`, `m`) - - `quarter` (возможные сокращения: `qq`, `q`) - - `year` (возможные сокращения: `yyyy`, `yy`) - -- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). - -**Возвращаемое значение** - -Разница между `enddate` и `startdate`, выраженная в `unit`. - -Тип: [Int](../../sql-reference/data-types/int-uint.md). - -**Пример** - -Запрос: - -``` sql -SELECT age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Результат: - -``` text -┌─age('hour', toDateTime('2018-01-01 22:30:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 24 │ -└───────────────────────────────────────────────────────────────────────────────────┘ -``` - -Запрос: - -``` sql -SELECT - toDate('2022-01-01') AS e, - toDate('2021-12-29') AS s, - age('day', s, e) AS day_age, - age('month', s, e) AS month__age, - age('year', s, e) AS year_age; -``` - -Результат: - -``` text -┌──────────e─┬──────────s─┬─day_age─┬─month__age─┬─year_age─┐ -│ 2022-01-01 │ 2021-12-29 │ 3 │ 0 │ 0 │ -└────────────┴────────────┴─────────┴────────────┴──────────┘ -``` - -## date\_diff {#date_diff} - -Вычисляет разницу указанных границ `unit` пересекаемых между `startdate` и `enddate`. - -**Синтаксис** - -``` sql -date_diff('unit', startdate, enddate, [timezone]) -``` - -Синонимы: `dateDiff`, `DATE_DIFF`. - -**Аргументы** - -- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). - Возможные значения: - - - `second` (возможные сокращения: `ss`, `s`) - - `minute` (возможные сокращения: `mi`, `n`) - - `hour` (возможные сокращения: `hh`, `h`) - - `day` (возможные сокращения: `dd`, `d`) - - `week` (возможные сокращения: `wk`, `ww`) - - `month` (возможные сокращения: `mm`, `m`) - - `quarter` (возможные сокращения: `qq`, `q`) - - `year` (возможные сокращения: `yyyy`, `yy`) - -- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). - -- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). - -**Возвращаемое значение** - -Разница между `enddate` и `startdate`, выраженная в `unit`. - -Тип: [Int](../../sql-reference/data-types/int-uint.md). - -**Пример** - -Запрос: - -``` sql -SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); -``` - -Результат: - -``` text -┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ -│ 25 │ -└────────────────────────────────────────────────────────────────────────────────────────┘ -``` - ## date_trunc {#date_trunc} Отсекает от даты и времени части, меньшие чем указанная часть. @@ -815,6 +689,60 @@ SELECT date_add(YEAR, 3, toDate('2018-01-01')); └───────────────────────────────────────────────┘ ``` +## date\_diff {#date_diff} + +Вычисляет разницу между двумя значениями дат или дат со временем. + +**Синтаксис** + +``` sql +date_diff('unit', startdate, enddate, [timezone]) +``` + +Синонимы: `dateDiff`, `DATE_DIFF`. + +**Аргументы** + +- `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). + Возможные значения: + + - `second` + - `minute` + - `hour` + - `day` + - `week` + - `month` + - `quarter` + - `year` + +- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md). + +- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md). + +**Возвращаемое значение** + +Разница между `enddate` и `startdate`, выраженная в `unit`. + +Тип: [Int](../../sql-reference/data-types/int-uint.md). + +**Пример** + +Запрос: + +``` sql +SELECT dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00')); +``` + +Результат: + +``` text +┌─dateDiff('hour', toDateTime('2018-01-01 22:00:00'), toDateTime('2018-01-02 23:00:00'))─┐ +│ 25 │ +└────────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## date\_sub {#date_sub} Вычитает интервал времени или даты из указанной даты или даты со временем. diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx index 058f0ae421a..ecfdcddbbe2 100644 --- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx +++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx @@ -41,7 +41,7 @@ ORDER BY (postcode1, postcode2, addr1, addr2); 我们将使用 `url` 函数将数据流式传输到 ClickHouse。我们需要首先预处理一些传入的数据,其中包括: - 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询 -- 将`time` 字段转换为日期为它只包含 00:00 时间 +- 将`time` 字段转换为日期因为它只包含 00:00 时间 - 忽略 [UUid](/docs/zh/sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析 - 使用 [transform](/docs/zh/sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段 - 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](/docs/zh/sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1 diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 1581b95213e..a1602482073 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -11,8 +11,8 @@ Wants=time-sync.target [Service] Type=notify -# Switching off watchdog is very important for sd_notify to work correctly. -Environment=CLICKHOUSE_WATCHDOG_ENABLE=0 +# NOTE: we leave clickhouse watchdog process enabled to be able to see OOM/SIGKILL traces in clickhouse-server.log files. +# If you wish to disable the watchdog and rely on systemd logs just add "Environment=CLICKHOUSE_WATCHDOG_ENABLE=0" line. User=clickhouse Group=clickhouse Restart=always diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 115f76174bd..9923b8b365a 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -16,6 +16,8 @@ #include +#include + #include "config_version.h" #include #include @@ -258,6 +260,10 @@ try if (is_interactive && !config().has("no-warnings")) showWarnings(); + /// Set user password complexity rules + auto & access_control = global_context->getAccessControl(); + access_control.setPasswordComplexityRules(connection->getPasswordComplexityRules()); + if (is_interactive && !delayed_interactive) { runInteractive(); diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 33d11091660..1614fb1a8b4 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 574e9bfa62e..f98b33c7f87 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include @@ -70,6 +70,8 @@ #include #include #include +#include +#include #include #include #include "MetricsTransmitter.h" @@ -287,7 +289,6 @@ namespace ErrorCodes extern const int MISMATCHING_USERS_FOR_PROCESS_AND_DATA; extern const int NETWORK_ERROR; extern const int CORRUPTED_DATA; - extern const int SYSTEM_ERROR; } @@ -661,51 +662,6 @@ static void sanityChecks(Server & server) } } -#if defined(OS_LINUX) -/// Sends notification to systemd, analogous to sd_notify from libsystemd -static void systemdNotify(const std::string_view & command) -{ - const char * path = getenv("NOTIFY_SOCKET"); // NOLINT(concurrency-mt-unsafe) - - if (path == nullptr) - return; /// not using systemd - - int s = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0); - - if (s == -1) - throwFromErrno("Can't create UNIX socket for systemd notify.", ErrorCodes::SYSTEM_ERROR); - - SCOPE_EXIT({ close(s); }); - - const size_t len = strlen(path); - - struct sockaddr_un addr; - - addr.sun_family = AF_UNIX; - - if (len < 2 || len > sizeof(addr.sun_path) - 1) - throw Exception(ErrorCodes::SYSTEM_ERROR, "NOTIFY_SOCKET env var value \"{}\" is wrong.", path); - - memcpy(addr.sun_path, path, len + 1); /// write last zero as well. - - size_t addrlen = offsetof(struct sockaddr_un, sun_path) + len; - - /// '@' meass this is Linux abstract socket, per documentation it must be sun_path[0] must be set to '\0' for it. - if (path[0] == '@') - addr.sun_path[0] = 0; - else if (path[0] == '/') - addrlen += 1; /// non-abstract-addresses should be zero terminated. - else - throw Exception(ErrorCodes::SYSTEM_ERROR, "Wrong UNIX path \"{}\" in NOTIFY_SOCKET env var", path); - - const struct sockaddr *sock_addr = reinterpret_cast (&addr); - - if (sendto(s, command.data(), command.size(), 0, sock_addr, static_cast (addrlen)) != static_cast (command.size())) - throw Exception("Failed to notify systemd.", ErrorCodes::SYSTEM_ERROR); - -} -#endif - int Server::main(const std::vector & /*args*/) try { @@ -748,8 +704,8 @@ try else { const String config_path = config().getString("config-file", "config.xml"); - const auto config_dir = std::filesystem::path{config_path}.remove_filename(); - setenv("OPENSSL_CONF", config_dir.string() + "openssl.conf", true); + const auto config_dir = std::filesystem::path{config_path}.replace_filename("openssl.conf"); + setenv("OPENSSL_CONF", config_dir.string(), true); } #endif @@ -761,6 +717,8 @@ try registerDisks(/* global_skip_access_check= */ false); registerFormats(); registerRemoteFileMetadatas(); + registerSchedulerNodes(); + registerResourceManagers(); CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); @@ -1335,6 +1293,11 @@ try global_context->getDistributedSchedulePool().increaseThreadsCount(new_pool_size); } + if (config->has("resources")) + { + global_context->getResourceManager()->updateConfiguration(*config); + } + if (!initial_loading) { /// We do not load ZooKeeper configuration on the first config loading @@ -1861,6 +1824,9 @@ try } #if defined(OS_LINUX) + /// Tell the service manager that service startup is finished. + /// NOTE: the parent clickhouse-watchdog process must do systemdNotify("MAINPID={}\n", child_pid); before + /// the child process notifies 'READY=1'. systemdNotify("READY=1\n"); #endif diff --git a/programs/server/config.xml b/programs/server/config.xml index deebb434120..0cbc3d9339e 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -466,6 +466,30 @@ 1 1 + + + time + * ^ ^ ^ ^ ^ ^ + * | | | | | | + * enqueue wait dequeue execute consume finish + * + * 1) Request is enqueued using ISchedulerQueue::enqueueRequest(). + * 2) Request competes with others for access to a resource; effectively just waiting in a queue. + * 3) Scheduler calls ISchedulerNode::dequeueRequest() that returns the request. + * 4) Callback ResourceRequest::execute() is called to provide access to the resource. + * 5) The resource consumption is happening outside of the scheduling subsystem. + * 6) request->constraint->finishRequest() is called when consumption is finished. + * + * Steps (5) and (6) can be omitted if constraint is not used by the resource. + * + * Request can be created on stack or heap. + * Request ownership is done outside of the scheduling subsystem. + * After (6) request can be destructed safely. + * + * Request cancelling is not supported yet. + */ +class ResourceRequest +{ +public: + /// Cost of request execution; should be filled before request enqueueing. + /// NOTE: If cost is not known in advance, credit model can be used: + /// NOTE: for the first request use 1 and + ResourceCost cost; + + /// Request outcome + /// Should be filled during resource consumption + bool successful = true; + + /// Scheduler node to be notified on consumption finish + /// Auto-filled during request enqueue/dequeue + ISchedulerConstraint * constraint = nullptr; + + /// Timestamps for introspection + ResourceNs enqueue_ns = 0; + ResourceNs execute_ns = 0; + ResourceNs finish_ns = 0; + + explicit ResourceRequest(ResourceCost cost_ = 1) + : cost(cost_) + {} + + virtual ~ResourceRequest() = default; + + /// Callback to trigger resource consumption. + /// IMPORTANT: is called from scheduler thread and must be fast, + /// just triggering start of a consumption, not doing the consumption itself + /// (e.g. setting an std::promise or creating a job in a thread pool) + virtual void execute() = 0; +}; + +} diff --git a/src/IO/S3Common.cpp b/src/IO/S3Common.cpp index 91f575d5097..c03f7f07310 100644 --- a/src/IO/S3Common.cpp +++ b/src/IO/S3Common.cpp @@ -851,8 +851,12 @@ namespace S3 quoteString(bucket), !uri.empty() ? " (" + uri.toString() + ")" : ""); } + bool isNotFoundError(Aws::S3::S3Errors error) + { + return error == Aws::S3::S3Errors::RESOURCE_NOT_FOUND || error == Aws::S3::S3Errors::NO_SUCH_KEY; + } - S3::ObjectInfo getObjectInfo(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3) + Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) { ProfileEvents::increment(ProfileEvents::S3HeadObject); if (for_disk_s3) @@ -865,7 +869,12 @@ namespace S3 if (!version_id.empty()) req.SetVersionId(version_id); - Aws::S3::Model::HeadObjectOutcome outcome = client_ptr->HeadObject(req); + return client.HeadObject(req); + } + + S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3) + { + auto outcome = headObject(client, bucket, key, version_id, for_disk_s3); if (outcome.IsSuccess()) { @@ -874,16 +883,34 @@ namespace S3 } else if (throw_on_error) { - throw DB::Exception(outcome.GetError().GetMessage(), ErrorCodes::S3_ERROR); + const auto & error = outcome.GetError(); + throw DB::Exception(ErrorCodes::S3_ERROR, + "Failed to HEAD object: {}. HTTP response code: {}", + error.GetMessage(), static_cast(error.GetResponseCode())); } return {}; } - size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3) + size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3) { - return getObjectInfo(client_ptr, bucket, key, version_id, throw_on_error, for_disk_s3).size; + return getObjectInfo(client, bucket, key, version_id, throw_on_error, for_disk_s3).size; } + bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool for_disk_s3) + { + auto outcome = headObject(client, bucket, key, version_id, for_disk_s3); + + if (outcome.IsSuccess()) + return true; + + const auto & error = outcome.GetError(); + if (isNotFoundError(error.GetErrorType())) + return false; + + throw S3Exception(error.GetErrorType(), + "Failed to check existence of key {} in bucket {}: {}", + key, bucket, error.GetMessage()); + } } } diff --git a/src/IO/S3Common.h b/src/IO/S3Common.h index c68d76ece41..73dc51b980f 100644 --- a/src/IO/S3Common.h +++ b/src/IO/S3Common.h @@ -13,20 +13,17 @@ #include #include #include +#include #include #include #include #include -namespace Aws::S3 -{ - class S3Client; -} - namespace DB { + namespace ErrorCodes { extern const int S3_ERROR; @@ -130,16 +127,22 @@ struct ObjectInfo time_t last_modification_time = 0; }; -S3::ObjectInfo getObjectInfo(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3); +bool isNotFoundError(Aws::S3::S3Errors error); -size_t getObjectSize(std::shared_ptr client_ptr, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3); +Aws::S3::Model::HeadObjectOutcome headObject(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false); + +S3::ObjectInfo getObjectInfo(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3); + +size_t getObjectSize(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id, bool throw_on_error, bool for_disk_s3); + +bool objectExists(const Aws::S3::S3Client & client, const String & bucket, const String & key, const String & version_id = "", bool for_disk_s3 = false); } #endif namespace Poco::Util { -class AbstractConfiguration; + class AbstractConfiguration; }; namespace DB::S3 diff --git a/src/IO/SchedulerNodeFactory.h b/src/IO/SchedulerNodeFactory.h new file mode 100644 index 00000000000..5c31534a9b8 --- /dev/null +++ b/src/IO/SchedulerNodeFactory.h @@ -0,0 +1,57 @@ +#pragma once + +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +class SchedulerNodeFactory : private boost::noncopyable +{ +public: + static SchedulerNodeFactory & instance() + { + static SchedulerNodeFactory ret; + return ret; + } + + SchedulerNodePtr get(const String & name, EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + std::lock_guard lock{mutex}; + if (auto iter = methods.find(name); iter != methods.end()) + return iter->second(event_queue, config, config_prefix); + throw Exception(ErrorCodes::INVALID_SCHEDULER_NODE, "Unknown scheduler node type: {}", name); + } + + template + void registerMethod(const String & name) + { + std::lock_guard lock{mutex}; + methods[name] = [] (EventQueue * event_queue, const Poco::Util::AbstractConfiguration & config, const String & config_prefix) + { + return std::make_shared(event_queue, config, config_prefix); + }; + } + +private: + std::mutex mutex; + using Method = std::function; + std::unordered_map methods; +}; + +} diff --git a/src/IO/SchedulerRoot.h b/src/IO/SchedulerRoot.h new file mode 100644 index 00000000000..f9af2099b8c --- /dev/null +++ b/src/IO/SchedulerRoot.h @@ -0,0 +1,250 @@ +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int INVALID_SCHEDULER_NODE; +} + +/* + * Resource scheduler root node with a dedicated thread. + * Immediate children correspond to different resources. + */ +class SchedulerRoot : public ISchedulerNode +{ +private: + struct TResource + { + SchedulerNodePtr root; + + // Intrusive cyclic list of active resources + TResource * next = nullptr; + TResource * prev = nullptr; + + explicit TResource(const SchedulerNodePtr & root_) + : root(root_) + { + root->info.parent.ptr = this; + } + + // Get pointer stored by ctor in info + static TResource * get(SchedulerNodeInfo & info) + { + return reinterpret_cast(info.parent.ptr); + } + }; + +public: + SchedulerRoot() + : ISchedulerNode(&events) + {} + + ~SchedulerRoot() override + { + stop(); + } + + /// Runs separate scheduler thread + void start() + { + if (!scheduler.joinable()) + scheduler = ThreadFromGlobalPool([this] { schedulerThread(); }); + } + + /// Joins scheduler threads and execute every pending request iff graceful + void stop(bool graceful = true) + { + if (scheduler.joinable()) + { + stop_flag.store(true); + events.enqueue([]{}); // just to wake up thread + scheduler.join(); + if (graceful) + { + // Do the same cycle as schedulerThread() but never block, just exit instead + bool has_work = true; + while (has_work) + { + auto [request, _] = dequeueRequest(); + if (request) + execute(request); + else + has_work = false; + while (events.tryProcess()) + has_work = true; + } + } + } + } + + bool equals(ISchedulerNode * other) override + { + if (auto * o = dynamic_cast(other)) + return true; + return false; + } + + void attachChild(const SchedulerNodePtr & child) override + { + // Take ownership + assert(child->parent == nullptr); + if (auto [it, inserted] = children.emplace(child.get(), child); !inserted) + throw Exception( + ErrorCodes::INVALID_SCHEDULER_NODE, + "Can't add the same scheduler node twice"); + + // Attach + child->setParent(this); + + // Activate child if required + if (child->isActive()) + activateChild(child.get()); + } + + void removeChild(ISchedulerNode * child) override + { + if (auto iter = children.find(child); iter != children.end()) + { + SchedulerNodePtr removed = iter->second.root; + + // Deactivate if required + deactivate(&iter->second); + + // Detach + removed->setParent(nullptr); + + // Remove ownership + children.erase(iter); + } + } + + ISchedulerNode * getChild(const String &) override + { + abort(); // scheduler is allowed to have multiple children with the same name + } + + std::pair dequeueRequest() override + { + if (current == nullptr) // No active resources + return {nullptr, false}; + + // Dequeue request from current resource + auto [request, resource_active] = current->root->dequeueRequest(); + assert(request != nullptr); + + // Deactivate resource if required + if (!resource_active) + deactivate(current); + else + current = current->next; // Just move round-robin pointer + + return {request, current != nullptr}; + } + + bool isActive() override + { + return current != nullptr; + } + + void activateChild(ISchedulerNode * child) override + { + activate(TResource::get(child->info)); + } + + void setParent(ISchedulerNode *) override + { + abort(); // scheduler must be the root and this function should not be called + } + +private: + void activate(TResource * value) + { + assert(value->next == nullptr && value->prev == nullptr); + if (current == nullptr) // No active children + { + current = value; + value->prev = value; + value->next = value; + } + else + { + current->prev->next = value; + value->prev = current->prev; + current->prev = value; + value->next = current; + } + } + + void deactivate(TResource * value) + { + if (value->next == nullptr) + return; // Already deactivated + assert(current != nullptr); + if (current == value) + { + if (current->next == current) // We are going to remove the last active child + { + value->next = nullptr; + value->prev = nullptr; + current = nullptr; + return; + } + else // Just move current to next to avoid invalidation + current = current->next; + } + value->prev->next = value->next; + value->next->prev = value->prev; + value->prev = nullptr; + value->next = nullptr; + } + +private: + void schedulerThread() + { + while (!stop_flag.load()) + { + // Dequeue and execute single request + auto [request, _] = dequeueRequest(); + if (request) + execute(request); + else // No more requests -- block until any event happens + events.process(); + + // Process all events before dequeuing to ensure fair competition + while (events.tryProcess()) {} + } + } + + void execute(ResourceRequest * request) + { + request->execute_ns = clock_gettime_ns(); + request->execute(); + } + +private: + TResource * current = nullptr; // round-robin pointer + std::unordered_map children; // resources by pointer + std::atomic stop_flag = false; + EventQueue events; + ThreadFromGlobalPool scheduler; +}; + +} diff --git a/src/IO/WriteBufferFromS3.cpp b/src/IO/WriteBufferFromS3.cpp index 56d487f165b..37bc8c78cf4 100644 --- a/src/IO/WriteBufferFromS3.cpp +++ b/src/IO/WriteBufferFromS3.cpp @@ -50,6 +50,7 @@ const int S3_WARN_MAX_PARTS = 10000; namespace ErrorCodes { extern const int S3_ERROR; + extern const int INVALID_CONFIG_PARAMETER; } struct WriteBufferFromS3::UploadPartTask @@ -71,7 +72,7 @@ WriteBufferFromS3::WriteBufferFromS3( std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings_, + const S3Settings::RequestSettings & request_settings, std::optional> object_metadata_, size_t buffer_size_, ThreadPoolCallbackRunner schedule_, @@ -79,10 +80,12 @@ WriteBufferFromS3::WriteBufferFromS3( : BufferWithOwnMemory(buffer_size_, nullptr, 0) , bucket(bucket_) , key(key_) - , request_settings(request_settings_) + , settings(request_settings.getUploadSettings()) + , check_objects_after_upload(request_settings.check_objects_after_upload) + , max_unexpected_write_error_retries(request_settings.max_unexpected_write_error_retries) , client_ptr(std::move(client_ptr_)) , object_metadata(std::move(object_metadata_)) - , upload_part_size(request_settings_.min_upload_part_size) + , upload_part_size(settings.min_upload_part_size) , schedule(std::move(schedule_)) , write_settings(write_settings_) { @@ -107,9 +110,10 @@ void WriteBufferFromS3::nextImpl() write_settings.remote_throttler->add(offset()); /// Data size exceeds singlepart upload threshold, need to use multipart upload. - if (multipart_upload_id.empty() && last_part_size > request_settings.max_single_part_upload_size) + if (multipart_upload_id.empty() && last_part_size > settings.max_single_part_upload_size) createMultipartUpload(); + chassert(upload_part_size > 0); if (!multipart_upload_id.empty() && last_part_size > upload_part_size) { writePart(); @@ -122,12 +126,6 @@ void WriteBufferFromS3::nextImpl() void WriteBufferFromS3::allocateBuffer() { - if (total_parts_uploaded != 0 && total_parts_uploaded % request_settings.upload_part_size_multiply_parts_count_threshold == 0) - { - upload_part_size *= request_settings.upload_part_size_multiply_factor; - upload_part_size = std::min(upload_part_size, request_settings.max_upload_part_size); - } - temporary_buffer = Aws::MakeShared("temporary buffer"); temporary_buffer->exceptions(std::ios::badbit); last_part_size = 0; @@ -180,21 +178,11 @@ void WriteBufferFromS3::finalizeImpl() if (!multipart_upload_id.empty()) completeMultipartUpload(); - if (request_settings.check_objects_after_upload) + if (check_objects_after_upload) { LOG_TRACE(log, "Checking object {} exists after upload", key); - - Aws::S3::Model::HeadObjectRequest request; - request.SetBucket(bucket); - request.SetKey(key); - - ProfileEvents::increment(ProfileEvents::S3HeadObject); - if (write_settings.for_object_storage) - ProfileEvents::increment(ProfileEvents::DiskS3HeadObject); - - auto response = client_ptr->HeadObject(request); - + auto response = S3::headObject(*client_ptr, bucket, key, "", write_settings.for_object_storage); if (!response.IsSuccess()) throw S3Exception(fmt::format("Object {} from bucket {} disappeared immediately after upload, it's a bug in S3 or S3 API.", key, bucket), response.GetError().GetErrorType()); else @@ -257,13 +245,10 @@ void WriteBufferFromS3::writePart() { UploadPartTask * task = nullptr; - int part_number; { std::lock_guard lock(bg_tasks_mutex); - task = &upload_object_tasks.emplace_back(); ++num_added_bg_tasks; - part_number = num_added_bg_tasks; } /// Notify waiting thread when task finished @@ -281,7 +266,7 @@ void WriteBufferFromS3::writePart() try { - fillUploadRequest(task->req, part_number); + fillUploadRequest(task->req); schedule([this, task, task_finish_notify]() { @@ -308,23 +293,44 @@ void WriteBufferFromS3::writePart() UploadPartTask task; auto & tags = TSA_SUPPRESS_WARNING_FOR_WRITE(part_tags); /// Suppress warning because schedule == false. - fillUploadRequest(task.req, static_cast(tags.size() + 1)); + fillUploadRequest(task.req); processUploadRequest(task); tags.push_back(task.tag); } } -void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & req, int part_number) +void WriteBufferFromS3::fillUploadRequest(Aws::S3::Model::UploadPartRequest & req) { + /// Increase part number. + ++part_number; + if (!multipart_upload_id.empty() && (part_number > settings.max_part_number)) + { + throw Exception( + ErrorCodes::INVALID_CONFIG_PARAMETER, + "Part number exceeded {} while writing {} bytes to S3. Check min_upload_part_size = {}, max_upload_part_size = {}, " + "upload_part_size_multiply_factor = {}, upload_part_size_multiply_parts_count_threshold = {}, max_single_part_upload_size = {}", + settings.max_part_number, count(), settings.min_upload_part_size, settings.max_upload_part_size, + settings.upload_part_size_multiply_factor, settings.upload_part_size_multiply_parts_count_threshold, + settings.max_single_part_upload_size); + } + + /// Setup request. req.SetBucket(bucket); req.SetKey(key); - req.SetPartNumber(part_number); + req.SetPartNumber(static_cast(part_number)); req.SetUploadId(multipart_upload_id); req.SetContentLength(temporary_buffer->tellp()); req.SetBody(temporary_buffer); /// If we don't do it, AWS SDK can mistakenly set it to application/xml, see https://github.com/aws/aws-sdk-cpp/issues/1840 req.SetContentType("binary/octet-stream"); + + /// Maybe increase `upload_part_size` (we need to increase it sometimes to keep `part_number` less or equal than `max_part_number`). + if (!multipart_upload_id.empty() && (part_number % settings.upload_part_size_multiply_parts_count_threshold == 0)) + { + upload_part_size *= settings.upload_part_size_multiply_factor; + upload_part_size = std::min(upload_part_size, settings.max_upload_part_size); + } } void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) @@ -343,8 +349,6 @@ void WriteBufferFromS3::processUploadRequest(UploadPartTask & task) } else throw S3Exception(outcome.GetError().GetMessage(), outcome.GetError().GetErrorType()); - - total_parts_uploaded++; } void WriteBufferFromS3::completeMultipartUpload() @@ -370,7 +374,7 @@ void WriteBufferFromS3::completeMultipartUpload() req.SetMultipartUpload(multipart_upload); - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3CompleteMultipartUpload); @@ -476,7 +480,7 @@ void WriteBufferFromS3::fillPutRequest(Aws::S3::Model::PutObjectRequest & req) void WriteBufferFromS3::processPutRequest(const PutObjectTask & task) { - size_t max_retry = std::max(request_settings.max_unexpected_write_error_retries, 1UL); + size_t max_retry = std::max(max_unexpected_write_error_retries, 1UL); for (size_t i = 0; i < max_retry; ++i) { ProfileEvents::increment(ProfileEvents::S3PutObject); diff --git a/src/IO/WriteBufferFromS3.h b/src/IO/WriteBufferFromS3.h index b4b5a6d37a3..41ed009bcf9 100644 --- a/src/IO/WriteBufferFromS3.h +++ b/src/IO/WriteBufferFromS3.h @@ -50,7 +50,7 @@ public: std::shared_ptr client_ptr_, const String & bucket_, const String & key_, - const S3Settings::RequestSettings & request_settings_, + const S3Settings::RequestSettings & request_settings, std::optional> object_metadata_ = std::nullopt, size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE, ThreadPoolCallbackRunner schedule_ = {}, @@ -75,7 +75,7 @@ private: void finalizeImpl() override; struct UploadPartTask; - void fillUploadRequest(Aws::S3::Model::UploadPartRequest & req, int part_number); + void fillUploadRequest(Aws::S3::Model::UploadPartRequest & req); void processUploadRequest(UploadPartTask & task); struct PutObjectTask; @@ -88,14 +88,16 @@ private: const String bucket; const String key; - const S3Settings::RequestSettings request_settings; + const S3Settings::RequestSettings::PartUploadSettings settings; + const bool check_objects_after_upload = false; + const size_t max_unexpected_write_error_retries = 4; const std::shared_ptr client_ptr; const std::optional> object_metadata; size_t upload_part_size = 0; std::shared_ptr temporary_buffer; /// Buffer to accumulate data. size_t last_part_size = 0; - std::atomic total_parts_uploaded = 0; + size_t part_number = 0; /// Upload in S3 is made in parts. /// We initiate upload, then upload each part and get ETag as a response, and then finalizeImpl() upload with listing all our parts. diff --git a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp index d623d510ffd..3386dfb8792 100644 --- a/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateRoleQuery.cpp @@ -40,13 +40,18 @@ BlockIO InterpreterCreateRoleQuery::execute() else getContext()->checkAccess(AccessType::CREATE_ROLE); - if (!query.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, getContext()); - std::optional settings_from_query; if (query.settings) + { settings_from_query = SettingsProfileElements{*query.settings, access_control}; + if (!query.attach) + getContext()->checkSettingsConstraints(*settings_from_query); + } + + if (!query.cluster.empty()) + return executeDDLQueryOnCluster(query_ptr, getContext()); + if (query.alter) { auto update_func = [&](const AccessEntityPtr & entity) -> AccessEntityPtr diff --git a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp index 95c2a58388a..0727b6f2182 100644 --- a/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateSettingsProfileQuery.cpp @@ -48,16 +48,21 @@ BlockIO InterpreterCreateSettingsProfileQuery::execute() else getContext()->checkAccess(AccessType::CREATE_SETTINGS_PROFILE); + std::optional settings_from_query; + if (query.settings) + { + settings_from_query = SettingsProfileElements{*query.settings, access_control}; + + if (!query.attach) + getContext()->checkSettingsConstraints(*settings_from_query); + } + if (!query.cluster.empty()) { query.replaceCurrentUserTag(getContext()->getUserName()); return executeDDLQueryOnCluster(query_ptr, getContext()); } - std::optional settings_from_query; - if (query.settings) - settings_from_query = SettingsProfileElements{*query.settings, access_control}; - std::optional roles_from_query; if (query.to_roles) roles_from_query = RolesOrUsersSet{*query.to_roles, access_control, getContext()->getUserID()}; diff --git a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp index c36dd1c98d6..1aa5cd03530 100644 --- a/src/Interpreters/Access/InterpreterCreateUserQuery.cpp +++ b/src/Interpreters/Access/InterpreterCreateUserQuery.cpp @@ -108,6 +108,12 @@ BlockIO InterpreterCreateUserQuery::execute() throw Exception(ErrorCodes::BAD_ARGUMENTS, "Authentication type NO_PASSWORD must be explicitly specified, check the setting allow_implicit_no_password in the server configuration"); + if (!query.attach && query.temporary_password_for_checks) + { + access_control.checkPasswordComplexityRules(query.temporary_password_for_checks.value()); + query.temporary_password_for_checks.reset(); + } + std::optional default_roles_from_query; if (query.default_roles) { @@ -118,12 +124,19 @@ BlockIO InterpreterCreateUserQuery::execute() access->checkAdminOption(role); } } - if (!query.cluster.empty()) - return executeDDLQueryOnCluster(query_ptr, getContext()); + std::optional settings_from_query; if (query.settings) + { settings_from_query = SettingsProfileElements{*query.settings, access_control}; + if (!query.attach) + getContext()->checkSettingsConstraints(*settings_from_query); + } + + if (!query.cluster.empty()) + return executeDDLQueryOnCluster(query_ptr, getContext()); + if (query.alter) { std::optional grantees_from_query; diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp index a7ca6ed521d..02704f7fc78 100644 --- a/src/Interpreters/ActionsDAG.cpp +++ b/src/Interpreters/ActionsDAG.cpp @@ -1537,12 +1537,39 @@ ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & return res; } +ActionsDAG::NodeRawConstPtrs ActionsDAG::getParents(const Node * target) const +{ + NodeRawConstPtrs parents; + for (const auto & node : getNodes()) + { + for (const auto & child : node.children) + { + if (child == target) + { + parents.push_back(&node); + break; + } + } + } + return parents; +} + ActionsDAG::SplitResult ActionsDAG::splitActionsBySortingDescription(const NameSet & sort_columns) const { std::unordered_set split_nodes; for (const auto & sort_column : sort_columns) if (const auto * node = tryFindInOutputs(sort_column)) + { split_nodes.insert(node); + /// Sorting can materialize const columns, so if we have const expression used in sorting, + /// we should also add all it's parents, otherwise, we can break the header + /// (function can expect const column, but will get materialized). + if (node->column && isColumnConst(*node->column)) + { + auto parents = getParents(node); + split_nodes.insert(parents.begin(), parents.end()); + } + } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Sorting column {} wasn't found in the ActionsDAG's outputs. DAG:\n{}", diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h index 7f00250505c..a532dd0c436 100644 --- a/src/Interpreters/ActionsDAG.h +++ b/src/Interpreters/ActionsDAG.h @@ -343,6 +343,8 @@ public: const ContextPtr & context); private: + NodeRawConstPtrs getParents(const Node * target) const; + Node & addNode(Node node); #if USE_EMBEDDED_COMPILER diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 72fa1b3c324..db95b161a4f 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -1107,8 +1107,10 @@ void FileCache::reduceSizeToDownloaded( file_segment->getInfoForLogUnlocked(segment_lock)); } + CreateFileSegmentSettings create_settings{ .is_persistent = file_segment->is_persistent }; + cell->file_segment = std::make_shared( - offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, CreateFileSegmentSettings{}); + offset, downloaded_size, key, this, FileSegment::State::DOWNLOADED, create_settings); assert(file_segment->reserved_size == downloaded_size); } diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 418bcee05d9..177c6aecf7c 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -56,6 +56,7 @@ FileSegment::FileSegment( { reserved_size = downloaded_size = size_; is_downloaded = true; + chassert(std::filesystem::file_size(getPathInLocalCache()) == size_); break; } case (State::SKIP_CACHE): @@ -331,6 +332,8 @@ void FileSegment::write(const char * from, size_t size, size_t offset) cache_writer->next(); downloaded_size += size; + + chassert(std::filesystem::file_size(getPathInLocalCache()) == downloaded_size); } catch (Exception & e) { @@ -345,9 +348,7 @@ void FileSegment::write(const char * from, size_t size, size_t offset) throw; } -#ifndef NDEBUG chassert(getFirstNonDownloadedOffset() == offset + size); -#endif } FileSegment::State FileSegment::wait() @@ -545,6 +546,13 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach resetDownloaderUnlocked(segment_lock); } + if (cache_writer && (is_downloader || is_last_holder)) + { + cache_writer->finalize(); + cache_writer.reset(); + remote_file_reader.reset(); + } + switch (download_state) { case State::SKIP_CACHE: @@ -557,8 +565,9 @@ void FileSegment::completeBasedOnCurrentState(std::lock_guard & cach case State::DOWNLOADED: { chassert(getDownloadedSizeUnlocked(segment_lock) == range().size()); - assert(is_downloaded); - assert(!cache_writer); + chassert(getDownloadedSizeUnlocked(segment_lock) == std::filesystem::file_size(getPathInLocalCache())); + chassert(is_downloaded); + chassert(!cache_writer); break; } case State::DOWNLOADING: diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index abd6f226ff5..b457df53db9 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -203,7 +203,7 @@ void ClientInfo::setInitialQuery() void ClientInfo::fillOSUserHostNameAndVersionInfo() { os_user.resize(256, '\0'); - if (0 == getlogin_r(os_user.data(), os_user.size() - 1)) + if (0 == getlogin_r(os_user.data(), static_cast(os_user.size() - 1))) os_user.resize(strlen(os_user.c_str())); else os_user.clear(); /// Don't mind if we cannot determine user login. diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index bfd29de970d..c124eb1c881 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -221,6 +222,7 @@ struct ContextSharedPart : boost::noncopyable String system_profile_name; /// Profile used by system processes String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying std::unique_ptr access_control; + mutable ResourceManagerPtr resource_manager; mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks. mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files. mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache. @@ -1065,6 +1067,21 @@ std::vector Context::getEnabledProfiles() const } +ResourceManagerPtr Context::getResourceManager() const +{ + auto lock = getLock(); + if (!shared->resource_manager) + shared->resource_manager = ResourceManagerFactory::instance().get(getConfigRef().getString("resource_manager", "static")); + return shared->resource_manager; +} + +ClassifierPtr Context::getClassifier() const +{ + auto lock = getLock(); + return getResourceManager()->acquire(getSettingsRef().workload); +} + + const Scalars & Context::getScalars() const { return scalars; @@ -1254,6 +1271,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const if (select_query_hint && getSettingsRef().use_structure_from_insertion_table_in_table_functions == 2) { const auto * expression_list = select_query_hint->select()->as(); + std::unordered_set virtual_column_names = table_function_ptr->getVirtualsToCheckBeforeUsingStructureHint(); Names columns_names; bool have_asterisk = false; /// First, check if we have only identifiers, asterisk and literals in select expression, @@ -1275,10 +1293,10 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression, const } } - /// Check that all identifiers are column names from insertion table. + /// Check that all identifiers are column names from insertion table and not virtual column names from storage. for (const auto & column_name : columns_names) { - if (!structure_hint.has(column_name)) + if (!structure_hint.has(column_name) || virtual_column_names.contains(column_name)) { use_columns_from_insert_query = false; break; @@ -1411,6 +1429,11 @@ void Context::applySettingsChanges(const SettingsChanges & changes) } +void Context::checkSettingsConstraints(const SettingsProfileElements & profile_elements) const +{ + getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, profile_elements); +} + void Context::checkSettingsConstraints(const SettingChange & change) const { getSettingsConstraintsAndCurrentProfiles()->constraints.check(settings, change); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 807467959a0..655f65aa69a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -110,6 +111,7 @@ class AccessControl; class Credentials; class GSSAcceptorContext; struct SettingsConstraintsAndProfileIDs; +class SettingsProfileElements; class RemoteHostFilter; struct StorageID; class IDisk; @@ -539,6 +541,10 @@ public: std::shared_ptr getQuota() const; std::optional getQuotaUsage() const; + /// Resource management related + ResourceManagerPtr getResourceManager() const; + ClassifierPtr getClassifier() const; + /// We have to copy external tables inside executeQuery() to track limits. Therefore, set callback for it. Must set once. void setExternalTablesInitializer(ExternalTablesInitializer && initializer); /// This method is called in executeQuery() and will call the external tables initializer. @@ -658,6 +664,7 @@ public: void applySettingsChanges(const SettingsChanges & changes); /// Checks the constraints. + void checkSettingsConstraints(const SettingsProfileElements & profile_elements) const; void checkSettingsConstraints(const SettingChange & change) const; void checkSettingsConstraints(const SettingsChanges & changes) const; void checkSettingsConstraints(SettingsChanges & changes) const; diff --git a/src/Interpreters/ConvertStringsToEnumVisitor.cpp b/src/Interpreters/ConvertStringsToEnumVisitor.cpp index 745e484022c..b141f75fbec 100644 --- a/src/Interpreters/ConvertStringsToEnumVisitor.cpp +++ b/src/Interpreters/ConvertStringsToEnumVisitor.cpp @@ -141,7 +141,7 @@ void ConvertStringsToEnumMatcher::visit(ASTFunction & function_node, Data & data if (function_node.name == "if") { - if (function_node.arguments->children.size() != 2) + if (function_node.arguments->children.size() != 3) return; const ASTLiteral * literal1 = function_node.arguments->children[1]->as(); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index a76b13e5dcf..6ac01a9473f 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -1126,6 +1126,7 @@ void DatabaseCatalog::cleanupStoreDirectoryTask() continue; size_t affected_dirs = 0; + size_t checked_dirs = 0; for (auto it = disk->iterateDirectory("store"); it->isValid(); it->next()) { String prefix = it->name(); @@ -1135,6 +1136,7 @@ void DatabaseCatalog::cleanupStoreDirectoryTask() if (!expected_prefix_dir) { LOG_WARNING(log, "Found invalid directory {} on disk {}, will try to remove it", it->path(), disk_name); + checked_dirs += 1; affected_dirs += maybeRemoveDirectory(disk_name, disk, it->path()); continue; } @@ -1150,6 +1152,7 @@ void DatabaseCatalog::cleanupStoreDirectoryTask() if (!expected_dir) { LOG_WARNING(log, "Found invalid directory {} on disk {}, will try to remove it", jt->path(), disk_name); + checked_dirs += 1; affected_dirs += maybeRemoveDirectory(disk_name, disk, jt->path()); continue; } @@ -1161,6 +1164,7 @@ void DatabaseCatalog::cleanupStoreDirectoryTask() /// so it looks safe enough to remove directory if we don't have uuid mapping for it. /// No table or database using this directory should concurrently appear, /// because creation of new table would fail with "directory already exists". + checked_dirs += 1; affected_dirs += maybeRemoveDirectory(disk_name, disk, jt->path()); } } @@ -1168,7 +1172,7 @@ void DatabaseCatalog::cleanupStoreDirectoryTask() if (affected_dirs) LOG_INFO(log, "Cleaned up {} directories from store/ on disk {}", affected_dirs, disk_name); - else + if (checked_dirs == 0) LOG_TEST(log, "Nothing to clean up from store/ on disk {}", disk_name); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 22229c0d6c2..bc93abff534 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -15,6 +15,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include #include +#include #include #include @@ -1831,7 +1833,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ssize_t where_step_num = -1; ssize_t having_step_num = -1; - auto finalize_chain = [&](ExpressionActionsChain & chain) + auto finalize_chain = [&](ExpressionActionsChain & chain) -> ColumnsWithTypeAndName { if (prewhere_step_num >= 0) { @@ -1852,7 +1854,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); + auto res = chain.getLastStep().getResultColumns(); chain.clear(); + return res; }; { @@ -1970,7 +1974,55 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (settings.group_by_use_nulls) query_analyzer.appendGroupByModifiers(before_aggregation, chain, only_types); - finalize_chain(chain); + auto columns_before_aggregation = finalize_chain(chain); + + /// Here we want to check that columns after aggregation have the same type as + /// were promised in query_analyzer.aggregated_columns + /// Ideally, they should be equal. In practice, this may be not true. + /// As an example, we don't build sets for IN inside ExpressionAnalysis::analyzeAggregation, + /// so that constant folding for expression (1 in 1) will not work. This may change the return type + /// for functions with LowCardinality argument: function "substr(toLowCardinality('abc'), 1 IN 1)" + /// should usually return LowCardinality(String) when (1 IN 1) is constant, but without built set + /// for (1 IN 1) constant is not propagated and "substr" returns String type. + /// See 02503_in_lc_const_args_bug.sql + /// + /// As a temporary solution, we add converting actions to the next chain. + /// Hopefully, later we can + /// * use a new analyzer where this issue is absent + /// * or remove ExpressionActionsChain completely and re-implement its logic on top of the query plan + { + for (auto & col : columns_before_aggregation) + if (!col.column) + col.column = col.type->createColumn(); + + Block header_before_aggregation(std::move(columns_before_aggregation)); + + auto keys = query_analyzer.aggregationKeys().getNames(); + const auto & aggregates = query_analyzer.aggregates(); + + bool has_grouping = query_analyzer.group_by_kind != GroupByKind::ORDINARY; + auto actual_header = Aggregator::Params::getHeader( + header_before_aggregation, /*only_merge*/ false, keys, aggregates, /*final*/ true); + actual_header = AggregatingStep::appendGroupingColumn( + std::move(actual_header), keys, has_grouping, settings.group_by_use_nulls); + + Block expected_header; + for (const auto & expected : query_analyzer.aggregated_columns) + expected_header.insert(ColumnWithTypeAndName(expected.type, expected.name)); + + if (!blocksHaveEqualStructure(actual_header, expected_header)) + { + auto converting = ActionsDAG::makeConvertingActions( + actual_header.getColumnsWithTypeAndName(), + expected_header.getColumnsWithTypeAndName(), + ActionsDAG::MatchColumnsMode::Name, + true); + + auto & step = chain.lastStep(query_analyzer.aggregated_columns); + auto & actions = step.actions(); + actions = ActionsDAG::merge(std::move(*actions), std::move(*converting)); + } + } if (query_analyzer.appendHaving(chain, only_types || !second_stage)) { diff --git a/src/Interpreters/GraceHashJoin.cpp b/src/Interpreters/GraceHashJoin.cpp index 5ef27613591..b8c6c639e82 100644 --- a/src/Interpreters/GraceHashJoin.cpp +++ b/src/Interpreters/GraceHashJoin.cpp @@ -236,22 +236,32 @@ private: Poco::Logger * log; }; - -static void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets_snapshot) +namespace { - assert(blocks.size() == buckets_snapshot.size()); +template +void flushBlocksToBuckets(Blocks & blocks, const GraceHashJoin::Buckets & buckets) +{ + chassert(blocks.size() == buckets.size()); retryForEach( - generateRandomPermutation(1, buckets_snapshot.size()), + generateRandomPermutation(1, buckets.size()), // skipping 0 block, since we join it in memory w/o spilling on disk [&](size_t i) { if (!blocks[i].rows()) return true; - bool flushed = buckets_snapshot[i]->tryAddRightBlock(blocks[i]); + + bool flushed = false; + if constexpr (table_side == JoinTableSide::Left) + flushed = buckets[i]->tryAddLeftBlock(blocks[i]); + if constexpr (table_side == JoinTableSide::Right) + flushed = buckets[i]->tryAddRightBlock(blocks[i]); + if (flushed) blocks[i].clear(); + return flushed; }); } +} GraceHashJoin::GraceHashJoin( ContextPtr context_, std::shared_ptr table_join_, @@ -274,7 +284,6 @@ GraceHashJoin::GraceHashJoin( { if (!GraceHashJoin::isSupported(table_join)) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "GraceHashJoin is not supported for this join type"); - } void GraceHashJoin::initBuckets() @@ -382,8 +391,9 @@ void GraceHashJoin::joinBlock(Block & block, std::shared_ptr & not_p materializeBlockInplace(block); - Buckets buckets_snapshot = getCurrentBuckets(); - size_t num_buckets = buckets_snapshot.size(); + /// number of buckets doesn't change after right table is split to buckets, i.e. read-only access to buckets + /// so, no need to copy buckets here + size_t num_buckets = getNumBuckets(); Blocks blocks = JoinCommon::scatterBlockByHash(left_key_names, block, num_buckets); block = std::move(blocks[current_bucket->idx]); @@ -392,15 +402,7 @@ void GraceHashJoin::joinBlock(Block & block, std::shared_ptr & not_p if (not_processed) throw Exception(ErrorCodes::LOGICAL_ERROR, "Unhandled not processed block in GraceHashJoin"); - // We need to skip the first bucket that is already joined in memory, so we start with 1. - retryForEach( - generateRandomPermutation(1, num_buckets), - [&blocks, &buckets_snapshot](size_t idx) - { - if (blocks[idx].rows() == 0) - return true; - return buckets_snapshot[idx]->tryAddLeftBlock(blocks[idx]); - }); + flushBlocksToBuckets(blocks, buckets); } void GraceHashJoin::setTotals(const Block & block) @@ -428,9 +430,11 @@ bool GraceHashJoin::alwaysReturnsEmptySet() const if (!isInnerOrRight(table_join->kind())) return false; - std::shared_lock lock(rehash_mutex); - - bool file_buckets_are_empty = std::all_of(buckets.begin(), buckets.end(), [](const auto & bucket) { return bucket->empty(); }); + bool file_buckets_are_empty = [this]() + { + std::shared_lock lock(rehash_mutex); + return std::all_of(buckets.begin(), buckets.end(), [](const auto & bucket) { return bucket->empty(); }); + }(); bool hash_join_is_empty = hash_join && hash_join->alwaysReturnsEmptySet(); return hash_join_is_empty && file_buckets_are_empty; @@ -610,7 +614,7 @@ void GraceHashJoin::addJoinedBlockImpl(Block block) blocks[bucket_index].clear(); } - flushBlocksToBuckets(blocks, buckets_snapshot); + flushBlocksToBuckets(blocks, buckets_snapshot); } size_t GraceHashJoin::getNumBuckets() const diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index f79ea950436..9fd577318f8 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -225,7 +225,7 @@ HashJoin::HashJoin(std::shared_ptr table_join_, const Block & right_s , right_sample_block(right_sample_block_) , log(&Poco::Logger::get("HashJoin")) { - LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}", data->type, kind, strictness); + LOG_DEBUG(log, "Datatype: {}, kind: {}, strictness: {}, right header: {}", data->type, kind, strictness, right_sample_block.dumpStructure()); LOG_DEBUG(log, "Keys: {}", TableJoin::formatClauses(table_join->getClauses(), true)); if (isCrossOrComma(kind)) diff --git a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp index cda91cd4ba1..866d38f3aa5 100644 --- a/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterAlterNamedCollectionQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp index c7397d3d64c..ccbbac71279 100644 --- a/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterCreateNamedCollectionQuery.cpp @@ -4,7 +4,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp index cb237287dc3..2205d6cff88 100644 --- a/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp +++ b/src/Interpreters/InterpreterDropNamedCollectionQuery.cpp @@ -3,7 +3,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp index ad3ec9b3189..fb77f0997d8 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp @@ -112,6 +112,11 @@ QueryPlan && InterpreterSelectQueryAnalyzer::extractQueryPlan() && return std::move(planner).extractQueryPlan(); } +void InterpreterSelectQueryAnalyzer::addStorageLimits(const StorageLimitsList & storage_limits) +{ + planner.addStorageLimits(storage_limits); +} + void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const { elem.query_kind = "Select"; diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h index 1dbe5ffccc6..04dfe4e0948 100644 --- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h +++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h @@ -41,6 +41,8 @@ public: QueryPlan && extractQueryPlan() &&; + void addStorageLimits(const StorageLimitsList & storage_limits); + bool supportsTransactions() const override { return true; } bool ignoreLimits() const override { return select_query_options.ignore_limits; } diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index 5d065e564b2..aa4f821657f 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -672,6 +672,11 @@ String TableJoin::renamedRightColumnName(const String & name) const return name; } +void TableJoin::setRename(const String & from, const String & to) +{ + renames[from] = to; +} + void TableJoin::addKey(const String & left_name, const String & right_name, const ASTPtr & left_ast, const ASTPtr & right_ast) { clauses.back().key_names_left.emplace_back(left_name); diff --git a/src/Interpreters/TableJoin.h b/src/Interpreters/TableJoin.h index 874e68b0b97..9d03c9bd57b 100644 --- a/src/Interpreters/TableJoin.h +++ b/src/Interpreters/TableJoin.h @@ -334,6 +334,7 @@ public: Block getRequiredRightKeys(const Block & right_table_keys, std::vector & keys_sources) const; String renamedRightColumnName(const String & name) const; + void setRename(const String & from, const String & to); void resetKeys(); void resetToCross(); diff --git a/src/Interpreters/TraceCollector.cpp b/src/Interpreters/TraceCollector.cpp index 050dea02717..367249f1289 100644 --- a/src/Interpreters/TraceCollector.cpp +++ b/src/Interpreters/TraceCollector.cpp @@ -97,6 +97,9 @@ void TraceCollector::run() Int64 size; readPODBinary(size, in); + UInt64 ptr; + readPODBinary(ptr, in); + ProfileEvents::Event event; readPODBinary(event, in); @@ -112,7 +115,7 @@ void TraceCollector::run() UInt64 time = static_cast(ts.tv_sec * 1000000000LL + ts.tv_nsec); UInt64 time_in_microseconds = static_cast((ts.tv_sec * 1000000LL) + (ts.tv_nsec / 1000)); - TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, event, increment}; + TraceLogElement element{time_t(time / 1000000000), time_in_microseconds, time, trace_type, thread_id, query_id, trace, size, ptr, event, increment}; trace_log->add(element); } } diff --git a/src/Interpreters/TraceLog.cpp b/src/Interpreters/TraceLog.cpp index 0408ebe504b..cd5f965a679 100644 --- a/src/Interpreters/TraceLog.cpp +++ b/src/Interpreters/TraceLog.cpp @@ -38,6 +38,7 @@ NamesAndTypesList TraceLogElement::getNamesAndTypes() {"query_id", std::make_shared()}, {"trace", std::make_shared(std::make_shared())}, {"size", std::make_shared()}, + {"ptr", std::make_shared()}, {"event", std::make_shared(std::make_shared())}, {"increment", std::make_shared()}, }; @@ -57,6 +58,7 @@ void TraceLogElement::appendToBlock(MutableColumns & columns) const columns[i++]->insertData(query_id.data(), query_id.size()); columns[i++]->insert(trace); columns[i++]->insert(size); + columns[i++]->insert(ptr); String event_name; if (event != ProfileEvents::end()) diff --git a/src/Interpreters/TraceLog.h b/src/Interpreters/TraceLog.h index c481f033a72..71aec0b50c4 100644 --- a/src/Interpreters/TraceLog.h +++ b/src/Interpreters/TraceLog.h @@ -27,8 +27,10 @@ struct TraceLogElement UInt64 thread_id{}; String query_id{}; Array trace{}; - /// Allocation size in bytes for TraceType::Memory. + /// Allocation size in bytes for TraceType::Memory and TraceType::MemorySample. Int64 size{}; + /// Allocation ptr for TraceType::MemorySample. + UInt64 ptr{}; /// ProfileEvent for TraceType::ProfileEvent. ProfileEvents::Event event{ProfileEvents::end()}; /// Increment of profile event for TraceType::ProfileEvent. diff --git a/src/Interpreters/createBlockSelector.cpp b/src/Interpreters/createBlockSelector.cpp index fce9833ddfb..659fc483373 100644 --- a/src/Interpreters/createBlockSelector.cpp +++ b/src/Interpreters/createBlockSelector.cpp @@ -5,16 +5,7 @@ #include -#if defined(__SSE2__) -# define LIBDIVIDE_SSE2 -#elif defined(__AVX512F__) || defined(__AVX512BW__) || defined(__AVX512VL__) -# define LIBDIVIDE_AVX512 -#elif defined(__AVX2__) -# define LIBDIVIDE_AVX2 -#elif defined(__aarch64__) && defined(__ARM_NEON) -# define LIBDIVIDE_NEON -#endif - +#include #include diff --git a/src/Interpreters/getClusterName.cpp b/src/Interpreters/getClusterName.cpp index d3c53b28cdf..dc3e9b41628 100644 --- a/src/Interpreters/getClusterName.cpp +++ b/src/Interpreters/getClusterName.cpp @@ -18,18 +18,31 @@ namespace ErrorCodes std::string getClusterName(const IAST & node) +{ + auto name = tryGetClusterName(node); + if (!name) + throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + return std::move(name).value(); +} + + +std::optional tryGetClusterName(const IAST & node) { if (const auto * ast_id = node.as()) return ast_id->name(); if (const auto * ast_lit = node.as()) - return checkAndGetLiteralArgument(*ast_lit, "cluster_name"); + { + if (ast_lit->value.getType() != Field::Types::String) + return {}; + return ast_lit->value.safeGet(); + } /// A hack to support hyphens in cluster names. if (const auto * ast_func = node.as()) { if (ast_func->name != "minus" || !ast_func->arguments || ast_func->arguments->children.size() < 2) - throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + return {}; String name; for (const auto & arg : ast_func->arguments->children) @@ -43,7 +56,7 @@ std::string getClusterName(const IAST & node) return name; } - throw Exception("Illegal expression instead of cluster name.", ErrorCodes::BAD_ARGUMENTS); + return {}; } diff --git a/src/Interpreters/getClusterName.h b/src/Interpreters/getClusterName.h index faf8975ede1..59952587a9a 100644 --- a/src/Interpreters/getClusterName.h +++ b/src/Interpreters/getClusterName.h @@ -15,6 +15,7 @@ namespace DB * Therefore, consider this case separately. */ std::string getClusterName(const IAST & node); +std::optional tryGetClusterName(const IAST & node); std::string getClusterNameAndMakeLiteral(ASTPtr & node); diff --git a/src/Interpreters/tests/gtest_lru_file_cache.cpp b/src/Interpreters/tests/gtest_lru_file_cache.cpp index 22150b9f656..5bad3e9bba2 100644 --- a/src/Interpreters/tests/gtest_lru_file_cache.cpp +++ b/src/Interpreters/tests/gtest_lru_file_cache.cpp @@ -80,7 +80,7 @@ void complete(const DB::FileSegmentsHolder & holder) { ASSERT_TRUE(file_segment->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(file_segment); - file_segment->completeWithState(DB::FileSegment::State::DOWNLOADED); + file_segment->completeWithoutState(); } } @@ -127,7 +127,7 @@ TEST(FileCache, get) assertRange(2, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADING); download(segments[0]); - segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[0]->completeWithoutState(); assertRange(3, segments[0], DB::FileSegment::Range(0, 9), DB::FileSegment::State::DOWNLOADED); } @@ -148,7 +148,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[1]); - segments[1]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[1]->completeWithoutState(); assertRange(6, segments[1], DB::FileSegment::Range(10, 14), DB::FileSegment::State::DOWNLOADED); } @@ -205,7 +205,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[2]); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[2]->completeWithoutState(); assertRange(14, segments[3], DB::FileSegment::Range(17, 20), DB::FileSegment::State::DOWNLOADED); @@ -246,7 +246,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments[3]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[3]); - segments[3]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[3]->completeWithoutState(); ASSERT_TRUE(segments[3]->state() == DB::FileSegment::State::DOWNLOADED); } @@ -269,8 +269,8 @@ TEST(FileCache, get) ASSERT_TRUE(segments[2]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments[0]); prepareAndDownload(segments[2]); - segments[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[0]->completeWithoutState(); + segments[2]->completeWithoutState(); } /// Current cache: [____][_] [][___][__] @@ -292,8 +292,8 @@ TEST(FileCache, get) ASSERT_TRUE(s1[0]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(s5[0]); prepareAndDownload(s1[0]); - s5[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); - s1[0]->completeWithState(DB::FileSegment::State::DOWNLOADED); + s5[0]->completeWithoutState(); + s1[0]->completeWithoutState(); /// Current cache: [___] [_][___][_] [__] /// ^ ^ ^ ^ ^ ^ ^ ^ @@ -395,7 +395,7 @@ TEST(FileCache, get) } prepareAndDownload(segments[2]); - segments[2]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments[2]->completeWithoutState(); ASSERT_TRUE(segments[2]->state() == DB::FileSegment::State::DOWNLOADED); other_1.join(); @@ -460,7 +460,7 @@ TEST(FileCache, get) ASSERT_TRUE(segments_2[1]->getOrSetDownloader() == DB::FileSegment::getCallerId()); prepareAndDownload(segments_2[1]); - segments_2[1]->completeWithState(DB::FileSegment::State::DOWNLOADED); + segments_2[1]->completeWithoutState(); }); { diff --git a/src/Interpreters/threadPoolCallbackRunner.cpp b/src/Interpreters/threadPoolCallbackRunner.cpp deleted file mode 100644 index 9bf32e4f2e1..00000000000 --- a/src/Interpreters/threadPoolCallbackRunner.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "threadPoolCallbackRunner.h" - -#include -#include -#include -#include -#include - - -namespace DB -{ - -template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) -{ - return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future - { - auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result - { - if (thread_group) - CurrentThread::attachTo(thread_group); - - SCOPE_EXIT_SAFE({ - if (thread_group) - CurrentThread::detachQueryIfNotDetached(); - }); - - setThreadName(thread_name.data()); - - return callback(); - }); - - auto future = task->get_future(); - - /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". - pool->scheduleOrThrow([task]{ (*task)(); }, -priority); - - return future; - }; -} - -template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); -template ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); - -} diff --git a/src/Interpreters/threadPoolCallbackRunner.h b/src/Interpreters/threadPoolCallbackRunner.h index c146ac67482..9cadcc0ebb0 100644 --- a/src/Interpreters/threadPoolCallbackRunner.h +++ b/src/Interpreters/threadPoolCallbackRunner.h @@ -1,9 +1,11 @@ #pragma once #include +#include +#include +#include #include - namespace DB { @@ -13,6 +15,32 @@ using ThreadPoolCallbackRunner = std::function(std::function /// Creates CallbackRunner that runs every callback with 'pool->scheduleOrThrow()'. template -ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name); +ThreadPoolCallbackRunner threadPoolCallbackRunner(ThreadPool & pool, const std::string & thread_name) +{ + return [pool = &pool, thread_group = CurrentThread::getGroup(), thread_name](std::function && callback, size_t priority) mutable -> std::future + { + auto task = std::make_shared>([thread_group, thread_name, callback = std::move(callback)]() -> Result + { + if (thread_group) + CurrentThread::attachTo(thread_group); + + SCOPE_EXIT_SAFE({ + if (thread_group) + CurrentThread::detachQueryIfNotDetached(); + }); + + setThreadName(thread_name.data()); + + return callback(); + }); + + auto future = task->get_future(); + + /// ThreadPool is using "bigger is higher priority" instead of "smaller is more priority". + pool->scheduleOrThrow([task]{ (*task)(); }, -priority); + + return future; + }; +} } diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 8357b9f2b6c..412775d3bf3 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -46,6 +46,8 @@ public: std::optional auth_data; + mutable std::optional temporary_password_for_checks; + std::optional hosts; std::optional add_hosts; std::optional remove_hosts; diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index ed6ecb62667..de83c5760c1 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -51,7 +51,7 @@ namespace } - bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, AuthenticationData & auth_data) + bool parseAuthenticationData(IParserBase::Pos & pos, Expected & expected, AuthenticationData & auth_data, std::optional & temporary_password_for_checks) { return IParserBase::wrapParseImpl(pos, [&] { @@ -165,6 +165,10 @@ namespace common_names.insert(ast_child->as().value.safeGet()); } + /// Save password separately for future complexity rules check + if (expect_password) + temporary_password_for_checks = value; + auth_data = AuthenticationData{*type}; if (auth_data.getType() == AuthenticationType::SHA256_PASSWORD) { @@ -438,6 +442,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec std::optional new_name; std::optional auth_data; + std::optional temporary_password_for_checks; std::optional hosts; std::optional add_hosts; std::optional remove_hosts; @@ -452,9 +457,11 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!auth_data) { AuthenticationData new_auth_data; - if (parseAuthenticationData(pos, expected, new_auth_data)) + std::optional new_temporary_password_for_checks; + if (parseAuthenticationData(pos, expected, new_auth_data, new_temporary_password_for_checks)) { auth_data = std::move(new_auth_data); + temporary_password_for_checks = std::move(new_temporary_password_for_checks); continue; } } @@ -539,6 +546,7 @@ bool ParserCreateUserQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec query->names = std::move(names); query->new_name = std::move(new_name); query->auth_data = std::move(auth_data); + query->temporary_password_for_checks = std::move(temporary_password_for_checks); query->hosts = std::move(hosts); query->add_hosts = std::move(add_hosts); query->remove_hosts = std::move(remove_hosts); diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 08027753984..74d14292459 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -830,21 +830,65 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!pos.isValid()) return false; - /** Maximum length of number. 319 symbols is enough to write maximum double in decimal form. - * Copy is needed to use strto* functions, which require 0-terminated string. - */ - static constexpr size_t MAX_LENGTH_OF_NUMBER = 319; + auto try_read_float = [&](const char * it, const char * end) + { + char * str_end; + errno = 0; /// Functions strto* don't clear errno. + Float64 float_value = std::strtod(it, &str_end); + if (str_end == end && errno != ERANGE) + { + if (float_value < 0) + throw Exception("Logical error: token number cannot begin with minus, but parsed float number is less than zero.", ErrorCodes::LOGICAL_ERROR); - if (pos->size() > MAX_LENGTH_OF_NUMBER) + if (negative) + float_value = -float_value; + + res = float_value; + + auto literal = std::make_shared(res); + literal->begin = literal_begin; + literal->end = ++pos; + node = literal; + + return true; + } + + expected.add(pos, "number"); + return false; + }; + + /// NaN and Inf + if (pos->type == TokenType::BareWord) + { + return try_read_float(pos->begin, pos->end); + } + + if (pos->type != TokenType::Number) { expected.add(pos, "number"); return false; } + /** Maximum length of number. 319 symbols is enough to write maximum double in decimal form. + * Copy is needed to use strto* functions, which require 0-terminated string. + */ + static constexpr size_t MAX_LENGTH_OF_NUMBER = 319; + char buf[MAX_LENGTH_OF_NUMBER + 1]; - size_t size = pos->size(); - memcpy(buf, pos->begin, size); + size_t buf_size = 0; + for (const auto * it = pos->begin; it != pos->end; ++it) + { + if (*it != '_') + buf[buf_size++] = *it; + if (unlikely(buf_size > MAX_LENGTH_OF_NUMBER)) + { + expected.add(pos, "number"); + return false; + } + } + + size_t size = buf_size; buf[size] = 0; char * start_pos = buf; @@ -915,29 +959,7 @@ bool ParserNumber::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } - char * pos_double = buf; - errno = 0; /// Functions strto* don't clear errno. - Float64 float_value = std::strtod(buf, &pos_double); - if (pos_double == buf + pos->size() && errno != ERANGE) - { - if (float_value < 0) - throw Exception("Logical error: token number cannot begin with minus, but parsed float number is less than zero.", ErrorCodes::LOGICAL_ERROR); - - if (negative) - float_value = -float_value; - - res = float_value; - - auto literal = std::make_shared(res); - literal->begin = literal_begin; - literal->end = ++pos; - node = literal; - - return true; - } - - expected.add(pos, "number"); - return false; + return try_read_float(buf, buf + buf_size); } diff --git a/src/Parsers/Lexer.cpp b/src/Parsers/Lexer.cpp index debcd9e7fd4..6bd27ee62ae 100644 --- a/src/Parsers/Lexer.cpp +++ b/src/Parsers/Lexer.cpp @@ -105,44 +105,71 @@ Token Lexer::nextTokenImpl() if (prev_significant_token_type == TokenType::Dot) { ++pos; - while (pos < end && isNumericASCII(*pos)) + while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(false, false, pos, end))) ++pos; } else { + bool start_of_block = false; /// 0x, 0b bool hex = false; if (pos + 2 < end && *pos == '0' && (pos[1] == 'x' || pos[1] == 'b' || pos[1] == 'X' || pos[1] == 'B')) { + bool is_valid = false; if (pos[1] == 'x' || pos[1] == 'X') - hex = true; - pos += 2; + { + if (isHexDigit(pos[2])) + { + hex = true; + is_valid = true; // hex + } + } + else if (pos[2] == '0' || pos[2] == '1') + is_valid = true; // bin + if (is_valid) + { + pos += 2; + start_of_block = true; + } + else + ++pos; // consume the leading zero - could be an identifier } else ++pos; - while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) + while (pos < end && ((hex ? isHexDigit(*pos) : isNumericASCII(*pos)) || isNumberSeparator(start_of_block, hex, pos, end))) + { ++pos; + start_of_block = false; + } /// decimal point if (pos < end && *pos == '.') { + start_of_block = true; ++pos; - while (pos < end && (hex ? isHexDigit(*pos) : isNumericASCII(*pos))) + while (pos < end && ((hex ? isHexDigit(*pos) : isNumericASCII(*pos)) || isNumberSeparator(start_of_block, hex, pos, end))) + { ++pos; + start_of_block = false; + } } /// exponentiation (base 10 or base 2) if (pos + 1 < end && (hex ? (*pos == 'p' || *pos == 'P') : (*pos == 'e' || *pos == 'E'))) { + start_of_block = true; ++pos; /// sign of exponent. It is always decimal. if (pos + 1 < end && (*pos == '-' || *pos == '+')) ++pos; - while (pos < end && isNumericASCII(*pos)) + while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(start_of_block, false, pos, end))) + { ++pos; + start_of_block = false; + } } } @@ -201,21 +228,29 @@ Token Lexer::nextTokenImpl() || prev_significant_token_type == TokenType::Number)) return Token(TokenType::Dot, token_begin, ++pos); + bool start_of_block = true; ++pos; - while (pos < end && isNumericASCII(*pos)) + while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(start_of_block, false, pos, end))) + { ++pos; + start_of_block = false; + } /// exponentiation if (pos + 1 < end && (*pos == 'e' || *pos == 'E')) { + start_of_block = true; ++pos; /// sign of exponent if (pos + 1 < end && (*pos == '-' || *pos == '+')) ++pos; - while (pos < end && isNumericASCII(*pos)) + while (pos < end && (isNumericASCII(*pos) || isNumberSeparator(start_of_block, false, pos, end))) + { ++pos; + start_of_block = false; + } } return Token(TokenType::Number, token_begin, pos); diff --git a/src/Parsers/ParserSystemQuery.cpp b/src/Parsers/ParserSystemQuery.cpp index 26ba9290d04..ffe8ecd365e 100644 --- a/src/Parsers/ParserSystemQuery.cpp +++ b/src/Parsers/ParserSystemQuery.cpp @@ -17,7 +17,7 @@ namespace ErrorCodes namespace DB { -static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, +[[nodiscard]] static bool parseQueryWithOnClusterAndMaybeTable(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, bool require_table, bool allow_string_literal) { /// Better form for user: SYSTEM table ON CLUSTER cluster @@ -71,7 +71,7 @@ enum class SystemQueryTargetType Disk }; -static bool parseQueryWithOnClusterAndTarget(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, SystemQueryTargetType target_type) +[[nodiscard]] static bool parseQueryWithOnClusterAndTarget(std::shared_ptr & res, IParser::Pos & pos, Expected & expected, SystemQueryTargetType target_type) { /// Better form for user: SYSTEM target_name ON CLUSTER cluster /// Query rewritten form + form while executing on cluster: SYSTEM ON CLUSTER cluster target_name @@ -136,7 +136,7 @@ static bool parseQueryWithOnClusterAndTarget(std::shared_ptr & r return true; } -static bool parseQueryWithOnCluster(std::shared_ptr & res, IParser::Pos & pos, +[[nodiscard]] static bool parseQueryWithOnCluster(std::shared_ptr & res, IParser::Pos & pos, Expected & expected) { String cluster_str; @@ -196,7 +196,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & } case Type::DROP_REPLICA: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; ASTPtr ast; if (!ParserStringLiteral{}.parse(pos, ast, expected)) @@ -239,7 +240,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::RESTART_REPLICA: case Type::SYNC_REPLICA: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; if (!parseDatabaseAndTableAsAST(pos, expected, res->database, res->table)) return false; break; @@ -247,7 +249,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::SYNC_DATABASE_REPLICA: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; if (!parseDatabaseAsAST(pos, expected, res->database)) return false; break; @@ -310,7 +313,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & } else { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; if (ParserKeyword{"ON VOLUME"}.ignore(pos, expected)) { if (!parse_on_volume()) @@ -335,13 +339,15 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & case Type::START_REPLICATED_SENDS: case Type::STOP_REPLICATION_QUEUES: case Type::START_REPLICATION_QUEUES: - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; parseDatabaseAndTableAsAST(pos, expected, res->database, res->table); break; case Type::SUSPEND: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; ASTPtr seconds; if (!(ParserKeyword{"FOR"}.ignore(pos, expected) @@ -360,7 +366,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & ASTPtr ast; if (path_parser.parse(pos, ast, expected)) res->filesystem_cache_path = ast->as()->value.safeGet(); - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; break; } case Type::DROP_SCHEMA_CACHE: @@ -397,7 +404,8 @@ bool ParserSystemQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected & default: { - parseQueryWithOnCluster(res, pos, expected); + if (!parseQueryWithOnCluster(res, pos, expected)) + return false; break; } } diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index 0cb4f77035b..d88766f3656 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -365,9 +365,9 @@ void Planner::buildQueryPlanIfNeeded() select_query_info.query = select_query_info.original_query; select_query_info.planner_context = planner_context; - StorageLimitsList storage_limits; - storage_limits.push_back(buildStorageLimits(*query_context, select_query_options)); - select_query_info.storage_limits = std::make_shared(storage_limits); + auto current_storage_limits = storage_limits; + current_storage_limits.push_back(buildStorageLimits(*query_context, select_query_options)); + select_query_info.storage_limits = std::make_shared(std::move(current_storage_limits)); collectTableExpressionData(query_tree, *planner_context); checkStoragesSupportTransactions(planner_context); @@ -847,4 +847,10 @@ void Planner::buildQueryPlanIfNeeded() extendQueryContextAndStoragesLifetime(query_plan, planner_context); } +void Planner::addStorageLimits(const StorageLimitsList & limits) +{ + for (const auto & limit : limits) + storage_limits.push_back(limit); +} + } diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h index de4ed5b92e9..1de3e0efded 100644 --- a/src/Planner/Planner.h +++ b/src/Planner/Planner.h @@ -45,11 +45,14 @@ public: return std::move(query_plan); } + void addStorageLimits(const StorageLimitsList & limits); + private: QueryTreeNodePtr query_tree; QueryPlan query_plan; SelectQueryOptions select_query_options; PlannerContextPtr planner_context; + StorageLimitsList storage_limits; }; } diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp index 019933f9b72..a17bbaebb04 100644 --- a/src/Planner/PlannerJoins.cpp +++ b/src/Planner/PlannerJoins.cpp @@ -45,8 +45,9 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; + extern const int INCOMPATIBLE_TYPE_OF_JOIN; extern const int INVALID_JOIN_ON_EXPRESSION; + extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; } @@ -671,9 +672,23 @@ std::shared_ptr chooseJoinAlgorithm(std::shared_ptr & table_jo { trySetStorageInTableJoin(right_table_expression, table_join); + auto & right_table_expression_data = planner_context->getTableExpressionDataOrThrow(right_table_expression); + /// JOIN with JOIN engine. if (auto storage = table_join->getStorageJoin()) + { + for (const auto & result_column : right_table_expression_header) + { + const auto * source_column_name = right_table_expression_data.getColumnNameOrNull(result_column.name); + if (!source_column_name) + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, + "JOIN with 'Join' table engine should be performed by storage keys [{}], but column '{}' was found", + fmt::join(storage->getKeyNames(), ", "), result_column.name); + + table_join->setRename(*source_column_name, result_column.name); + } return storage->getJoinLocked(table_join, planner_context->getQueryContext()); + } /** JOIN with constant. * Example: SELECT * FROM test_table AS t1 INNER JOIN test_table AS t2 ON 1; diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp index a26ed6b0b40..4599cdb8748 100644 --- a/src/Processors/Formats/ISchemaReader.cpp +++ b/src/Processors/Formats/ISchemaReader.cpp @@ -1,6 +1,5 @@ #include -#include -#include +#include #include #include #include @@ -11,65 +10,29 @@ namespace DB namespace ErrorCodes { extern const int ONLY_NULLS_WHILE_READING_SCHEMA; - extern const int TYPE_MISMATCH; extern const int INCORRECT_DATA; extern const int EMPTY_DATA_PASSED; extern const int BAD_ARGUMENTS; } -void chooseResultColumnType( - DataTypePtr & type, - DataTypePtr & new_type, - std::function transform_types_if_needed, - const DataTypePtr & default_type, - const String & column_name, - size_t row) +void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read) { - if (!type) - { - type = new_type; - return; - } - - if (!new_type || type->equals(*new_type)) - return; - - transform_types_if_needed(type, new_type); - if (type->equals(*new_type)) - return; - - /// If the new type and the previous type for this column are different, - /// we will use default type if we have it or throw an exception. - if (default_type) - type = default_type; - else - { - throw Exception( - ErrorCodes::TYPE_MISMATCH, - "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " - "You can specify the type for this column using setting schema_inference_hints", - type->getName(), - column_name, - row, - new_type->getName()); - } -} - -void checkResultColumnTypeAndAppend(NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read) -{ - if (!type) + if (!checkIfTypeIsComplete(type)) { if (!default_type) throw Exception( ErrorCodes::ONLY_NULLS_WHILE_READING_SCHEMA, "Cannot determine type for column '{}' by first {} rows of data, most likely this column contains only Nulls or empty " - "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints", + "Arrays/Maps. You can specify the type for this column using setting schema_inference_hints. " + "If your data contains complex JSON objects, try enabling one of the settings allow_experimental_object_type/input_format_json_read_objects_as_strings", name, rows_read); type = default_type; } - result.emplace_back(name, type); + + if (settings.schema_inference_make_columns_nullable) + type = makeNullableRecursively(type); } IIRowSchemaReader::IIRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_) @@ -88,6 +51,11 @@ void IIRowSchemaReader::setContext(ContextPtr & context) } } +void IIRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) +{ + transformInferredTypesIfNeeded(type, new_type, format_settings); +} + IRowSchemaReader::IRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IIRowSchemaReader(in_, format_settings_), column_names(splitColumnNames(format_settings.column_names_for_schema_inference)) { @@ -160,23 +128,28 @@ NamesAndTypesList IRowSchemaReader::readSchema() if (new_data_types.size() != data_types.size()) throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values"); - for (size_t i = 0; i != data_types.size(); ++i) + for (field_index = 0; field_index != data_types.size(); ++field_index) { /// Check if we couldn't determine the type of this column in a new row /// or the type for this column was taken from hints. - if (!new_data_types[i] || hints.contains(column_names[i])) + if (!new_data_types[field_index] || hints.contains(column_names[field_index])) continue; - auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type, i); }; - chooseResultColumnType(data_types[i], new_data_types[i], transform_types_if_needed, getDefaultType(i), std::to_string(i + 1), rows_read); + chooseResultColumnType(*this, data_types[field_index], new_data_types[field_index], getDefaultType(field_index), std::to_string(field_index + 1), rows_read); } } NamesAndTypesList result; - for (size_t i = 0; i != data_types.size(); ++i) + for (field_index = 0; field_index != data_types.size(); ++field_index) { - /// Check that we could determine the type of this column. - checkResultColumnTypeAndAppend(result, data_types[i], column_names[i], getDefaultType(i), rows_read); + /// Don't check/change types from hints. + if (!hints.contains(column_names[field_index])) + { + transformFinalTypeIfNeeded(data_types[field_index]); + /// Check that we could determine the type of this column. + checkFinalInferredType(data_types[field_index], column_names[field_index], format_settings, getDefaultType(field_index), rows_read); + } + result.emplace_back(column_names[field_index], data_types[field_index]); } return result; @@ -208,11 +181,6 @@ DataTypePtr IRowSchemaReader::getDefaultType(size_t column) const return nullptr; } -void IRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) -{ - transformInferredTypesIfNeeded(type, new_type, format_settings); -} - IRowWithNamesSchemaReader::IRowWithNamesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_, DataTypePtr default_type_) : IIRowSchemaReader(in_, format_settings_, default_type_) { @@ -245,7 +213,6 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() names_order.push_back(name); } - auto transform_types_if_needed = [&](DataTypePtr & type, DataTypePtr & new_type){ transformTypesIfNeeded(type, new_type); }; for (rows_read = 1; rows_read < max_rows_to_read; ++rows_read) { auto new_names_and_types = readRowAndGetNamesAndDataTypes(eof); @@ -277,7 +244,7 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() continue; auto & type = it->second; - chooseResultColumnType(type, new_type, transform_types_if_needed, default_type, name, rows_read); + chooseResultColumnType(*this, type, new_type, default_type, name, rows_read); } } @@ -285,20 +252,21 @@ NamesAndTypesList IRowWithNamesSchemaReader::readSchema() if (names_to_types.empty()) throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data"); - NamesAndTypesList result; + NamesAndTypesList result = getStaticNamesAndTypes(); for (auto & name : names_order) { auto & type = names_to_types[name]; - /// Check that we could determine the type of this column. - checkResultColumnTypeAndAppend(result, type, name, default_type, rows_read); + /// Don't check/change types from hints. + if (!hints.contains(name)) + { + transformFinalTypeIfNeeded(type); + /// Check that we could determine the type of this column. + checkFinalInferredType(type, name, format_settings, default_type, rows_read); + } + result.emplace_back(name, type); } return result; } -void IRowWithNamesSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) -{ - transformInferredTypesIfNeeded(type, new_type, format_settings); -} - } diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h index 503632fd2f8..9f3f4d880ef 100644 --- a/src/Processors/Formats/ISchemaReader.h +++ b/src/Processors/Formats/ISchemaReader.h @@ -9,6 +9,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int TYPE_MISMATCH; +} + /// Base class for schema inference for the data in some specific format. /// It reads some data from read buffer and try to determine the schema /// from read data. @@ -45,10 +50,14 @@ public: bool needContext() const override { return !hints_str.empty(); } void setContext(ContextPtr & context) override; + virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type); + protected: void setMaxRowsToRead(size_t max_rows) override { max_rows_to_read = max_rows; } size_t getNumRowsRead() const override { return rows_read; } + virtual void transformFinalTypeIfNeeded(DataTypePtr &) {} + size_t max_rows_to_read; size_t rows_read = 0; DataTypePtr default_type; @@ -82,7 +91,7 @@ protected: void setColumnNames(const std::vector & names) { column_names = names; } - virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t index); + size_t field_index; private: DataTypePtr getDefaultType(size_t column) const; @@ -111,7 +120,10 @@ protected: /// Set eof = true if can't read more data. virtual NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) = 0; - virtual void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type); + /// Get special static types that have the same name/type for each row. + /// For example, in JSONObjectEachRow format we have static column with + /// type String and name from a settings for object keys. + virtual NamesAndTypesList getStaticNamesAndTypes() { return {}; } }; /// Base class for schema inference for formats that don't need any data to @@ -125,16 +137,46 @@ public: virtual ~IExternalSchemaReader() = default; }; +template void chooseResultColumnType( + SchemaReader & schema_reader, DataTypePtr & type, DataTypePtr & new_type, - std::function transform_types_if_needed, const DataTypePtr & default_type, const String & column_name, - size_t row); + size_t row) +{ + if (!type) + { + type = new_type; + return; + } -void checkResultColumnTypeAndAppend( - NamesAndTypesList & result, DataTypePtr & type, const String & name, const DataTypePtr & default_type, size_t rows_read); + if (!new_type || type->equals(*new_type)) + return; + + schema_reader.transformTypesIfNeeded(type, new_type); + if (type->equals(*new_type)) + return; + + /// If the new type and the previous type for this column are different, + /// we will use default type if we have it or throw an exception. + if (default_type) + type = default_type; + else + { + throw Exception( + ErrorCodes::TYPE_MISMATCH, + "Automatically defined type {} for column '{}' in row {} differs from type defined by previous rows: {}. " + "You can specify the type for this column using setting schema_inference_hints", + type->getName(), + column_name, + row, + new_type->getName()); + } +} + +void checkFinalInferredType(DataTypePtr & type, const String & name, const FormatSettings & settings, const DataTypePtr & default_type, size_t rows_read); Strings splitColumnNames(const String & column_names_str); diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp index cae4cbab0d7..4eb7ab98f31 100644 --- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp @@ -3,7 +3,7 @@ #if USE_ARROW #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp index 878860aeb25..4d2ac6a5420 100644 --- a/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/BSONEachRowRowInputFormat.cpp @@ -772,27 +772,27 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo case BSONType::DOUBLE: { in.ignore(sizeof(Float64)); - return makeNullable(std::make_shared()); + return std::make_shared(); } case BSONType::BOOL: { in.ignore(sizeof(UInt8)); - return makeNullable(DataTypeFactory::instance().get("Bool")); + return DataTypeFactory::instance().get("Bool"); } case BSONType::INT64: { in.ignore(sizeof(Int64)); - return makeNullable(std::make_shared()); + return std::make_shared(); } case BSONType::DATETIME: { in.ignore(sizeof(Int64)); - return makeNullable(std::make_shared(6, "UTC")); + return std::make_shared(6, "UTC"); } case BSONType::INT32: { in.ignore(sizeof(Int32)); - return makeNullable(std::make_shared()); + return std::make_shared(); } case BSONType::SYMBOL: [[fallthrough]]; case BSONType::JAVA_SCRIPT_CODE: [[fallthrough]]; @@ -802,7 +802,7 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo BSONSizeT size; readBinary(size, in); in.ignore(size); - return makeNullable(std::make_shared()); + return std::make_shared(); } case BSONType::DOCUMENT: { @@ -856,10 +856,10 @@ DataTypePtr BSONEachRowSchemaReader::getDataTypeFromBSONField(BSONType type, boo { case BSONBinarySubtype::BINARY_OLD: [[fallthrough]]; case BSONBinarySubtype::BINARY: - return makeNullable(std::make_shared()); + return std::make_shared(); case BSONBinarySubtype::UUID_OLD: [[fallthrough]]; case BSONBinarySubtype::UUID: - return makeNullable(std::make_shared()); + return std::make_shared(); default: throw Exception(ErrorCodes::UNKNOWN_TYPE, "BSON binary subtype {} is not supported", getBSONBinarySubtypeName(subtype)); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp index a50a9423965..1f1bf99739a 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp @@ -274,15 +274,15 @@ void CSVFormatReader::skipPrefixBeforeHeader() } -CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_) +CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_) : FormatWithNamesAndTypesSchemaReader( in_, - format_setting_, + format_settings_, with_names_, with_types_, &reader, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV)) - , reader(in_, format_setting_) + , reader(in_, format_settings_) { } @@ -293,7 +293,7 @@ DataTypes CSVSchemaReader::readRowAndGetDataTypes() return {}; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV); + return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), FormatSettings::EscapingRule::CSV); } diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h index 23c659c5c0c..1d79265c22b 100644 --- a/src/Processors/Formats/Impl/CSVRowInputFormat.h +++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h @@ -75,7 +75,7 @@ public: class CSVSchemaReader : public FormatWithNamesAndTypesSchemaReader { public: - CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_); + CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_); private: DataTypes readRowAndGetDataTypes() override; diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp index 16df132b9d8..f8e328ed0fb 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include @@ -12,16 +13,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -static FormatSettings updateFormatSettings(const FormatSettings & settings) -{ - if (settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV || settings.custom.field_delimiter.empty()) - return settings; - - auto updated = settings; - updated.csv.delimiter = settings.custom.field_delimiter.front(); - return updated; -} - CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( const Block & header_, ReadBuffer & in_buf_, @@ -31,7 +22,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat( bool ignore_spaces_, const FormatSettings & format_settings_) : CustomSeparatedRowInputFormat( - header_, std::make_unique(in_buf_), params_, with_names_, with_types_, ignore_spaces_, updateFormatSettings(format_settings_)) + header_, std::make_unique(in_buf_), params_, with_names_, with_types_, ignore_spaces_, format_settings_) { } @@ -171,15 +162,31 @@ bool CustomSeparatedFormatReader::checkEndOfRow() } template -String CustomSeparatedFormatReader::readFieldIntoString(bool is_first) +String CustomSeparatedFormatReader::readFieldIntoString(bool is_first, bool is_last, bool is_unknown) { if (!is_first) skipFieldDelimiter(); skipSpaces(); + updateFormatSettings(is_last); if constexpr (is_header) + { + /// If the number of columns is unknown and we use CSV escaping rule, + /// we don't know what delimiter to expect after the value, + /// so we should read until we meet field_delimiter or row_after_delimiter. + if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + return readCSVStringWithTwoPossibleDelimiters( + *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + return readStringByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + } else + { + if (is_unknown && format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV) + return readCSVFieldWithTwoPossibleDelimiters( + *buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter); + return readFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings); + } } template @@ -192,14 +199,14 @@ std::vector CustomSeparatedFormatReader::readRowImpl() { do { - values.push_back(readFieldIntoString(values.empty())); + values.push_back(readFieldIntoString(values.empty(), false, true)); } while (!checkEndOfRow()); columns = values.size(); } else { for (size_t i = 0; i != columns; ++i) - values.push_back(readFieldIntoString(i == 0)); + values.push_back(readFieldIntoString(i == 0, i + 1 == columns, false)); } skipRowEndDelimiter(); @@ -223,9 +230,41 @@ void CustomSeparatedFormatReader::skipHeaderRow() skipRowEndDelimiter(); } -bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool, const String &) +void CustomSeparatedFormatReader::updateFormatSettings(bool is_last_column) +{ + if (format_settings.custom.escaping_rule != FormatSettings::EscapingRule::CSV) + return; + + /// Clean custom delimiter from previous delimiter. + format_settings.csv.custom_delimiter.clear(); + + /// If delimiter has length = 1, it will be more efficient to use csv.delimiter. + /// If we have some complex delimiter, normal CSV reading will now work properly if we will + /// use just the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||') + /// We have special implementation for such case that uses custom delimiter, it's not so efficient, + /// but works properly. + + if (is_last_column) + { + /// If field delimiter has length = 1, it will be more efficient to use csv.delimiter. + if (format_settings.custom.row_after_delimiter.size() == 1) + format_settings.csv.delimiter = format_settings.custom.row_after_delimiter.front(); + else + format_settings.csv.custom_delimiter = format_settings.custom.row_after_delimiter; + } + else + { + if (format_settings.custom.field_delimiter.size() == 1) + format_settings.csv.delimiter = format_settings.custom.field_delimiter.front(); + else + format_settings.csv.custom_delimiter = format_settings.custom.field_delimiter; + } +} + +bool CustomSeparatedFormatReader::readField(IColumn & column, const DataTypePtr & type, const SerializationPtr & serialization, bool is_last_file_column, const String &) { skipSpaces(); + updateFormatSettings(is_last_file_column); return deserializeFieldByEscapingRule(type, serialization, column, *buf, format_settings.custom.escaping_rule, format_settings); } @@ -237,6 +276,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof) if (!check_eof) return false; + /// Allow optional \n before eof. + checkChar('\n', *buf); return buf->eof(); } @@ -246,6 +287,8 @@ bool CustomSeparatedFormatReader::checkForSuffixImpl(bool check_eof) if (!check_eof) return true; + /// Allow optional \n before eof. + checkChar('\n', *buf); if (buf->eof()) return true; } @@ -312,7 +355,7 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader( &reader, getDefaultDataTypeForEscapingRule(format_setting_.custom.escaping_rule)) , buf(in_) - , reader(buf, ignore_spaces_, updateFormatSettings(format_setting_)) + , reader(buf, ignore_spaces_, format_setting_) { } @@ -328,12 +371,12 @@ DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypes() first_row = false; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info); } -void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) +void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredTypesIfNeeded(type, new_type, format_settings, reader.getEscapingRule()); + transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, reader.getEscapingRule(), &json_inference_info); } void registerInputFormatCustomSeparated(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h index e7e96ab87b1..8a3112eb9c1 100644 --- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h +++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -83,7 +84,9 @@ private: std::vector readRowImpl(); template - String readFieldIntoString(bool is_first); + String readFieldIntoString(bool is_first, bool is_last, bool is_unknown); + + void updateFormatSettings(bool is_last_column); PeekableReadBuffer * buf; bool ignore_spaces; @@ -98,11 +101,12 @@ public: private: DataTypes readRowAndGetDataTypes() override; - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; PeekableReadBuffer buf; CustomSeparatedFormatReader reader; bool first_row = true; + JSONInferenceInfo json_inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp index cfd68079bba..204a5077e31 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.cpp @@ -2,8 +2,11 @@ #include #include #include +#include +#include #include #include +#include namespace DB { @@ -170,19 +173,25 @@ JSONColumnsSchemaReaderBase::JSONColumnsSchemaReaderBase( ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_) : ISchemaReader(in_) , format_settings(format_settings_) + , hints_str(format_settings_.schema_inference_hints) , reader(std::move(reader_)) , column_names_from_settings(splitColumnNames(format_settings_.column_names_for_schema_inference)) { } -void JSONColumnsSchemaReaderBase::chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const +void JSONColumnsSchemaReaderBase::setContext(ContextPtr & ctx) { - auto convert_types_if_needed = [&](DataTypePtr & first, DataTypePtr & second) + ColumnsDescription columns; + if (tryParseColumnsListFromString(hints_str, columns, ctx)) { - DataTypes types = {first, second}; - transformInferredJSONTypesIfNeeded(types, format_settings); - }; - chooseResultColumnType(type, new_type, convert_types_if_needed, nullptr, column_name, row); + for (const auto & [name, type] : columns.getAll()) + hints[name] = type; + } +} + +void JSONColumnsSchemaReaderBase::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) +{ + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); } NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() @@ -220,9 +229,18 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() if (!names_to_types.contains(column_name)) names_order.push_back(column_name); - rows_in_block = 0; - auto column_type = readColumnAndGetDataType(column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); - chooseResulType(names_to_types[column_name], column_type, column_name, total_rows_read + 1); + if (const auto it = hints.find(column_name); it != hints.end()) + { + names_to_types[column_name] = it->second; + } + else + { + rows_in_block = 0; + auto column_type = readColumnAndGetDataType( + column_name, rows_in_block, format_settings.max_rows_to_read_for_schema_inference - total_rows_read); + chooseResultColumnType(*this, names_to_types[column_name], column_type, nullptr, column_name, total_rows_read + 1); + } + ++iteration; } while (!reader->checkChunkEndOrSkipColumnDelimiter()); @@ -237,8 +255,14 @@ NamesAndTypesList JSONColumnsSchemaReaderBase::readSchema() for (auto & name : names_order) { auto & type = names_to_types[name]; - /// Check that we could determine the type of this column. - checkResultColumnTypeAndAppend(result, type, name, nullptr, format_settings.max_rows_to_read_for_schema_inference); + /// Don't check/change types from hints. + if (!hints.contains(name)) + { + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); + /// Check that we could determine the type of this column. + checkFinalInferredType(type, name, format_settings, nullptr, format_settings.max_rows_to_read_for_schema_inference); + } + result.emplace_back(name, type); } return result; @@ -262,8 +286,8 @@ DataTypePtr JSONColumnsSchemaReaderBase::readColumnAndGetDataType(const String & } readJSONField(field, in); - DataTypePtr field_type = JSONUtils::getDataTypeFromField(field, format_settings); - chooseResulType(column_type, field_type, column_name, rows_read); + DataTypePtr field_type = tryInferDataTypeForSingleJSONField(field, format_settings, &inference_info); + chooseResultColumnType(*this, column_type, field_type, nullptr, column_name, rows_read); ++rows_read; } while (!reader->checkColumnEndOrSkipFieldDelimiter()); diff --git a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h index a8311123afc..3292b5649c9 100644 --- a/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h +++ b/src/Processors/Formats/Impl/JSONColumnsBlockInputFormatBase.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -76,18 +77,23 @@ class JSONColumnsSchemaReaderBase : public ISchemaReader public: JSONColumnsSchemaReaderBase(ReadBuffer & in_, const FormatSettings & format_settings_, std::unique_ptr reader_); + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type); + + bool needContext() const override { return !hints_str.empty(); } + void setContext(ContextPtr & ctx) override; + private: NamesAndTypesList readSchema() override; /// Read whole column in the block (up to max_rows_to_read rows) and extract the data type. DataTypePtr readColumnAndGetDataType(const String & column_name, size_t & rows_read, size_t max_rows_to_read); - /// Choose result type for column from two inferred types from different rows. - void chooseResulType(DataTypePtr & type, DataTypePtr & new_type, const String & column_name, size_t row) const; - const FormatSettings format_settings; + String hints_str; + std::unordered_map hints; std::unique_ptr reader; Names column_names_from_settings; + JSONInferenceInfo inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp index 6d74ca6d616..49564bde429 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -202,12 +203,17 @@ DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypes() if (in.eof()) return {}; - return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, format_settings, reader.yieldStrings()); + return JSONUtils::readRowAndGetDataTypesForJSONCompactEachRow(in, format_settings, &inference_info); } -void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) +void JSONCompactEachRowRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredJSONTypesIfNeeded(type, new_type, format_settings); + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); +} + +void JSONCompactEachRowRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type) +{ + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); } void registerInputFormatJSONCompactEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h index 2bcc0abae77..2151967517a 100644 --- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h @@ -4,6 +4,7 @@ #include #include #include +#include #include namespace DB @@ -80,10 +81,12 @@ public: private: DataTypes readRowAndGetDataTypes() override; - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; + void transformFinalTypeIfNeeded(DataTypePtr & type) override; JSONCompactEachRowFormatReader reader; bool first_row = true; + JSONInferenceInfo inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp index 8a5ef33b73d..c9502659267 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -300,9 +301,8 @@ void JSONEachRowRowInputFormat::readSuffix() assertEOF(*in); } -JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings_, const FormatSettings & format_settings_) +JSONEachRowSchemaReader::JSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_) : IRowWithNamesSchemaReader(in_, format_settings_) - , json_strings(json_strings_) { } @@ -336,12 +336,17 @@ NamesAndTypesList JSONEachRowSchemaReader::readRowAndGetNamesAndDataTypes(bool & return {}; } - return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, json_strings); + return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, &inference_info); } void JSONEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredJSONTypesIfNeeded(type, new_type, format_settings); + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); +} + +void JSONEachRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type) +{ + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); } void registerInputFormatJSONEachRow(FormatFactory & factory) @@ -391,11 +396,11 @@ void registerNonTrivialPrefixAndSuffixCheckerJSONEachRow(FormatFactory & factory void registerJSONEachRowSchemaReader(FormatFactory & factory) { - auto register_schema_reader = [&](const String & format_name, bool json_strings) + auto register_schema_reader = [&](const String & format_name) { - factory.registerSchemaReader(format_name, [json_strings](ReadBuffer & buf, const FormatSettings & settings) + factory.registerSchemaReader(format_name, [](ReadBuffer & buf, const FormatSettings & settings) { - return std::make_unique(buf, json_strings, settings); + return std::make_unique(buf, settings); }); factory.registerAdditionalInfoForSchemaCacheGetter(format_name, [](const FormatSettings & settings) { @@ -403,10 +408,10 @@ void registerJSONEachRowSchemaReader(FormatFactory & factory) }); }; - register_schema_reader("JSONEachRow", false); - register_schema_reader("JSONLines", false); - register_schema_reader("NDJSON", false); - register_schema_reader("JSONStringsEachRow", true); + register_schema_reader("JSONEachRow"); + register_schema_reader("JSONLines"); + register_schema_reader("NDJSON"); + register_schema_reader("JSONStringsEachRow"); } } diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h index 4e2946cfea6..beee9e95821 100644 --- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -94,15 +95,16 @@ protected: class JSONEachRowSchemaReader : public IRowWithNamesSchemaReader { public: - JSONEachRowSchemaReader(ReadBuffer & in_, bool json_strings, const FormatSettings & format_settings_); + JSONEachRowSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings_); private: NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; + void transformFinalTypeIfNeeded(DataTypePtr & type) override; - bool json_strings; bool first_row = true; bool data_in_square_brackets = false; + JSONInferenceInfo inference_info; }; } diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp index 6e6d6287840..f01f07024da 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace DB @@ -85,15 +86,25 @@ NamesAndTypesList JSONObjectEachRowSchemaReader::readRowAndGetNamesAndDataTypes( JSONUtils::skipComma(in); JSONUtils::readFieldName(in); - auto names_and_types = JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, false); + return JSONUtils::readRowAndGetNamesAndDataTypesForJSONEachRow(in, format_settings, &inference_info); +} + +NamesAndTypesList JSONObjectEachRowSchemaReader::getStaticNamesAndTypes() +{ if (!format_settings.json_object_each_row.column_for_object_name.empty()) - names_and_types.emplace_front(format_settings.json_object_each_row.column_for_object_name, std::make_shared()); - return names_and_types; + return {{format_settings.json_object_each_row.column_for_object_name, std::make_shared()}}; + + return {}; } void JSONObjectEachRowSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredJSONTypesIfNeeded(type, new_type, format_settings); + transformInferredJSONTypesIfNeeded(type, new_type, format_settings, &inference_info); +} + +void JSONObjectEachRowSchemaReader::transformFinalTypeIfNeeded(DataTypePtr & type) +{ + transformJSONTupleToArrayIfPossible(type, format_settings, &inference_info); } void registerInputFormatJSONObjectEachRow(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h index 466c0111a03..a15bc558c65 100644 --- a/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h +++ b/src/Processors/Formats/Impl/JSONObjectEachRowRowInputFormat.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -41,9 +42,12 @@ public: private: NamesAndTypesList readRowAndGetNamesAndDataTypes(bool & eof) override; + NamesAndTypesList getStaticNamesAndTypes() override; void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; + void transformFinalTypeIfNeeded(DataTypePtr & type) override; bool first_row = true; + JSONInferenceInfo inference_info; }; std::optional getColumnIndexForJSONObjectEachRowObjectName(const Block & header, const FormatSettings & settings); diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp index 4e62754bc3d..3a76a5a3fc6 100644 --- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp @@ -247,6 +247,14 @@ static void insertNull(IColumn & column, DataTypePtr type) static void insertUUID(IColumn & column, DataTypePtr type, const char * value, size_t size) { + auto insert_func = [&](IColumn & column_, DataTypePtr type_) + { + insertUUID(column_, type_, value, size); + }; + + if (checkAndInsertNullable(column, type, insert_func) || checkAndInsertLowCardinality(column, type, insert_func)) + return; + if (!isUUID(type)) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot insert MessagePack UUID into column with type {}.", type->getName()); ReadBufferFromMemory buf(value, size); @@ -470,16 +478,16 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object) { case msgpack::type::object_type::POSITIVE_INTEGER: [[fallthrough]]; case msgpack::type::object_type::NEGATIVE_INTEGER: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::FLOAT32: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::FLOAT64: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::BOOLEAN: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::BIN: [[fallthrough]]; case msgpack::type::object_type::STR: - return makeNullable(std::make_shared()); + return std::make_shared(); case msgpack::type::object_type::ARRAY: { msgpack::object_array object_array = object.via.array; diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp index faa74e234b9..c3f7b4e0ad7 100644 --- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp @@ -435,7 +435,7 @@ DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes() skipFieldDelimiter(in); readQuotedField(value, in); - auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); + auto type = tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); } skipEndOfRow(in, table_name); diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp index 58fd03a7a78..b0ce87f45da 100644 --- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp @@ -3,7 +3,7 @@ #if USE_ORC #include -#include +#include #include #include #include @@ -101,7 +101,7 @@ static size_t countIndicesForType(std::shared_ptr type) if (type->id() == arrow::Type::MAP) { auto * map_type = static_cast(type.get()); - return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()); + return countIndicesForType(map_type->key_type()) + countIndicesForType(map_type->item_type()) + 1; } return 1; diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp index c2253fe4b20..85eed00a95f 100644 --- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp @@ -4,7 +4,7 @@ #if USE_PARQUET #include -#include +#include #include #include #include diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp index 2ad2ad6f7a3..5c0192c1e4a 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -155,15 +156,15 @@ DataTypes RegexpSchemaReader::readRowAndGetDataTypes() for (size_t i = 0; i != field_extractor.getMatchedFieldsSize(); ++i) { String field(field_extractor.getField(i)); - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule)); + data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, format_settings.regexp.escaping_rule, &json_inference_info)); } return data_types; } -void RegexpSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) +void RegexpSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredTypesIfNeeded(type, new_type, format_settings, format_settings.regexp.escaping_rule); + transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, format_settings.regexp.escaping_rule, &json_inference_info); } diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h index 7fbb3fc320f..d6696ffe751 100644 --- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h +++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h @@ -5,12 +5,13 @@ #include #include #include +#include #include #include #include #include -#include #include +#include namespace DB @@ -81,12 +82,13 @@ public: private: DataTypes readRowAndGetDataTypes() override; - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; using EscapingRule = FormatSettings::EscapingRule; RegexpFieldExtractor field_extractor; PeekableReadBuffer buf; + JSONInferenceInfo json_inference_info; }; } diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp index 838aba72e3d..f5f05453f25 100644 --- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp @@ -249,7 +249,7 @@ NamesAndTypesList TSKVSchemaReader::readRowAndGetNamesAndDataTypes(bool & eof) if (has_value) { readEscapedString(value, in); - names_and_types.emplace_back(std::move(name), determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped)); + names_and_types.emplace_back(std::move(name), tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Escaped)); } else { diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp index 5066d40cbae..174a41a8a59 100644 --- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp @@ -268,7 +268,7 @@ DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypes() return {}; auto fields = reader.readRow(); - return determineDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); + return tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule()); } void registerInputFormatTabSeparated(FormatFactory & factory) diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp index 76fd0d2a907..ba6650c2887 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -25,6 +26,27 @@ namespace ErrorCodes ErrorCodes::CANNOT_READ_ALL_DATA); } +static void updateFormatSettingsIfNeeded(FormatSettings::EscapingRule escaping_rule, FormatSettings & settings, const ParsedTemplateFormatString & row_format, char default_csv_delimiter, size_t file_column) +{ + if (escaping_rule != FormatSettings::EscapingRule::CSV) + return; + + /// Clean custom_delimiter from previous column. + settings.csv.custom_delimiter.clear(); + /// If field delimiter is empty, we read until default csv delimiter. + if (row_format.delimiters[file_column + 1].empty()) + settings.csv.delimiter = default_csv_delimiter; + /// If field delimiter has length = 1, it will be more efficient to use csv.delimiter. + else if (row_format.delimiters[file_column + 1].size() == 1) + settings.csv.delimiter = row_format.delimiters[file_column + 1].front(); + /// If we have some complex delimiter, normal CSV reading will now work properly if we will + /// use the first character of delimiter (for example, if delimiter='||' and we have data 'abc|d||') + /// We have special implementation for such case that uses custom delimiter, it's not so efficient, + /// but works properly. + else + settings.csv.custom_delimiter = row_format.delimiters[file_column + 1]; +} + TemplateRowInputFormat::TemplateRowInputFormat( const Block & header_, ReadBuffer & in_, @@ -129,10 +151,8 @@ bool TemplateRowInputFormat::deserializeField(const DataTypePtr & type, const SerializationPtr & serialization, IColumn & column, size_t file_column) { EscapingRule escaping_rule = row_format.escaping_rules[file_column]; - if (escaping_rule == EscapingRule::CSV) - /// Will read unquoted string until settings.csv.delimiter - settings.csv.delimiter = row_format.delimiters[file_column + 1].empty() ? default_csv_delimiter : - row_format.delimiters[file_column + 1].front(); + updateFormatSettingsIfNeeded(escaping_rule, settings, row_format, default_csv_delimiter, file_column); + try { return deserializeFieldByEscapingRule(type, serialization, column, *buf, escaping_rule, settings); @@ -466,6 +486,7 @@ TemplateSchemaReader::TemplateSchemaReader( , format(format_) , row_format(row_format_) , format_reader(buf, ignore_spaces_, format, row_format, row_between_delimiter, format_settings) + , default_csv_delimiter(format_settings_.csv.delimiter) { setColumnNames(row_format.column_names); } @@ -489,20 +510,18 @@ DataTypes TemplateSchemaReader::readRowAndGetDataTypes() for (size_t i = 0; i != row_format.columnsCount(); ++i) { format_reader.skipDelimiter(i); - if (row_format.escaping_rules[i] == FormatSettings::EscapingRule::CSV) - format_settings.csv.delimiter = row_format.delimiters[i + 1].empty() ? format_settings.csv.delimiter : row_format.delimiters[i + 1].front(); - + updateFormatSettingsIfNeeded(row_format.escaping_rules[i], format_settings, row_format, default_csv_delimiter, i); field = readFieldByEscapingRule(buf, row_format.escaping_rules[i], format_settings); - data_types.push_back(determineDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i])); + data_types.push_back(tryInferDataTypeByEscapingRule(field, format_settings, row_format.escaping_rules[i], &json_inference_info)); } format_reader.skipRowEndDelimiter(); return data_types; } -void TemplateSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx) +void TemplateSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) { - transformInferredTypesIfNeeded(type, new_type, format_settings, row_format.escaping_rules[column_idx]); + transformInferredTypesByEscapingRuleIfNeeded(type, new_type, format_settings, row_format.escaping_rules[field_index], &json_inference_info); } static ParsedTemplateFormatString fillResultSetFormat(const FormatSettings & settings) diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h index 740683ad95d..8f9088e2c47 100644 --- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h +++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -121,13 +122,15 @@ public: DataTypes readRowAndGetDataTypes() override; private: - void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type, size_t column_idx) override; + void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override; PeekableReadBuffer buf; const ParsedTemplateFormatString format; const ParsedTemplateFormatString row_format; TemplateFormatReader format_reader; bool first_row = true; + JSONInferenceInfo json_inference_info; + const char default_csv_delimiter; }; bool parseDelimiterWithDiagnosticInfo(WriteBuffer & out, ReadBuffer & buf, const String & delimiter, const String & description, bool skip_spaces); diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp index 108b4203e3e..85182682f1b 100644 --- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp +++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp @@ -593,14 +593,14 @@ DataTypes ValuesSchemaReader::readRowAndGetDataTypes() { if (!data_types.empty()) { - skipWhitespaceIfAny(buf); assertChar(',', buf); skipWhitespaceIfAny(buf); } readQuotedField(value, buf); - auto type = determineDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); + auto type = tryInferDataTypeByEscapingRule(value, format_settings, FormatSettings::EscapingRule::Quoted); data_types.push_back(std::move(type)); + skipWhitespaceIfAny(buf); } assertChar(')', buf); diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h index e7dda957b04..b3066f0bdbb 100644 --- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h +++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h @@ -111,7 +111,7 @@ public: protected: ReadBuffer * in; - const FormatSettings format_settings; + FormatSettings format_settings; }; /// Base class for schema inference for formats with -WithNames and -WithNamesAndTypes suffixes. diff --git a/src/Processors/QueryPlan/AggregatingStep.cpp b/src/Processors/QueryPlan/AggregatingStep.cpp index 86039342c49..4fd6e7c11dd 100644 --- a/src/Processors/QueryPlan/AggregatingStep.cpp +++ b/src/Processors/QueryPlan/AggregatingStep.cpp @@ -79,9 +79,9 @@ Block generateOutputHeader(const Block & input_header, const Names & keys, bool } -static Block appendGroupingColumn(Block block, const Names & keys, const GroupingSetsParamsList & params, bool use_nulls) +Block AggregatingStep::appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls) { - if (params.empty()) + if (!has_grouping) return block; return generateOutputHeader(block, keys, use_nulls); @@ -104,7 +104,7 @@ AggregatingStep::AggregatingStep( bool memory_bound_merging_of_aggregation_results_enabled_) : ITransformingStep( input_stream_, - appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, grouping_sets_params_, group_by_use_nulls_), + appendGroupingColumn(params_.getHeader(input_stream_.header, final_), params_.keys, !grouping_sets_params_.empty(), group_by_use_nulls_), getTraits(should_produce_results_in_order_of_bucket_number_), false) , params(std::move(params_)) @@ -469,7 +469,7 @@ void AggregatingStep::updateOutputStream() { output_stream = createOutputStream( input_streams.front(), - appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, grouping_sets_params, group_by_use_nulls), + appendGroupingColumn(params.getHeader(input_streams.front().header, final), params.keys, !grouping_sets_params.empty(), group_by_use_nulls), getDataStreamTraits()); } diff --git a/src/Processors/QueryPlan/AggregatingStep.h b/src/Processors/QueryPlan/AggregatingStep.h index 9cb56432797..0dc06649d2d 100644 --- a/src/Processors/QueryPlan/AggregatingStep.h +++ b/src/Processors/QueryPlan/AggregatingStep.h @@ -42,6 +42,8 @@ public: bool should_produce_results_in_order_of_bucket_number_, bool memory_bound_merging_of_aggregation_results_enabled_); + static Block appendGroupingColumn(Block block, const Names & keys, bool has_grouping, bool use_nulls); + String getName() const override { return "Aggregating"; } void transformPipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override; diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h index 7598bdff63e..19d87b101de 100644 --- a/src/Processors/QueryPlan/QueryPlan.h +++ b/src/Processors/QueryPlan/QueryPlan.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 085399e4941..1be05135fe4 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -255,7 +255,14 @@ Chain buildPushingToViewsChain( for (const auto & view_id : views) { - auto view = DatabaseCatalog::instance().getTable(view_id, context); + auto view = DatabaseCatalog::instance().tryGetTable(view_id, context); + if (view == nullptr) + { + LOG_WARNING( + &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + continue; + } + auto view_metadata_snapshot = view->getInMemoryMetadataPtr(); ASTPtr query; @@ -299,8 +306,19 @@ Chain buildPushingToViewsChain( if (auto * materialized_view = dynamic_cast(view.get())) { + auto lock = materialized_view->tryLockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); + + if (lock == nullptr) + { + // In case the materialized view is dropped at this point, we register a warning and ignore it + assert(materialized_view->is_dropped); + LOG_WARNING( + &Poco::Logger::get("PushingToViews"), "Trying to access table {} but it doesn't exist", view_id.getFullTableName()); + continue; + } + type = QueryViewsLogElement::ViewType::MATERIALIZED; - result_chain.addTableLock(materialized_view->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout)); + result_chain.addTableLock(lock); StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); @@ -371,7 +389,7 @@ Chain buildPushingToViewsChain( } } - if (views_data) + if (views_data && !views_data->views.empty()) { size_t num_views = views_data->views.size(); const Settings & settings = context->getSettingsRef(); diff --git a/src/QueryPipeline/Chain.h b/src/QueryPipeline/Chain.h index 09fb5e54cd4..d6139281990 100644 --- a/src/QueryPipeline/Chain.h +++ b/src/QueryPipeline/Chain.h @@ -2,7 +2,7 @@ #include #include -#include +#include namespace DB { diff --git a/src/QueryPipeline/Pipe.h b/src/QueryPipeline/Pipe.h index 7e30d9c990e..2b61bfe7573 100644 --- a/src/QueryPipeline/Pipe.h +++ b/src/QueryPipeline/Pipe.h @@ -1,7 +1,7 @@ #pragma once #include -#include +#include #include #include diff --git a/src/QueryPipeline/QueryPipeline.h b/src/QueryPipeline/QueryPipeline.h index 63f444e6ec1..153bcc55b39 100644 --- a/src/QueryPipeline/QueryPipeline.h +++ b/src/QueryPipeline/QueryPipeline.h @@ -1,5 +1,5 @@ #pragma once -#include +#include #include #include #include diff --git a/src/QueryPipeline/PipelineResourcesHolder.cpp b/src/QueryPipeline/QueryPlanResourceHolder.cpp similarity index 94% rename from src/QueryPipeline/PipelineResourcesHolder.cpp rename to src/QueryPipeline/QueryPlanResourceHolder.cpp index 6ebb40086d3..2cd4dc42a83 100644 --- a/src/QueryPipeline/PipelineResourcesHolder.cpp +++ b/src/QueryPipeline/QueryPlanResourceHolder.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/QueryPipeline/PipelineResourcesHolder.h b/src/QueryPipeline/QueryPlanResourceHolder.h similarity index 100% rename from src/QueryPipeline/PipelineResourcesHolder.h rename to src/QueryPipeline/QueryPlanResourceHolder.h diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1c7028b49f5..c9d57a8e3be 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -741,14 +742,20 @@ void TCPHandler::processOrdinaryQueryWithProcessors() auto & pipeline = state.io.pipeline; if (query_context->getSettingsRef().allow_experimental_query_deduplication) + { + std::lock_guard lock(task_callback_mutex); sendPartUUIDs(); + } /// Send header-block, to allow client to prepare output format for data to send. { const auto & header = pipeline.getHeader(); if (header) + { + std::lock_guard lock(task_callback_mutex); sendData(header); + } } { @@ -1211,6 +1218,17 @@ void TCPHandler::sendHello() writeStringBinary(server_display_name, *out); if (client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_VERSION_PATCH) writeVarUInt(DBMS_VERSION_PATCH, *out); + if (client_tcp_protocol_version >= DBMS_MIN_PROTOCOL_VERSION_WITH_PASSWORD_COMPLEXITY_RULES) + { + auto rules = server.context()->getAccessControl().getPasswordComplexityRules(); + + writeVarUInt(rules.size(), *out); + for (const auto & [original_pattern, exception_message] : rules) + { + writeStringBinary(original_pattern, *out); + writeStringBinary(exception_message, *out); + } + } out->next(); } diff --git a/src/Storages/FileLog/ReadBufferFromFileLog.h b/src/Storages/FileLog/ReadBufferFromFileLog.h index d581ead951e..5991fe29b70 100644 --- a/src/Storages/FileLog/ReadBufferFromFileLog.h +++ b/src/Storages/FileLog/ReadBufferFromFileLog.h @@ -7,11 +7,6 @@ #include #include -namespace Poco -{ - class Logger; -} - namespace DB { class ReadBufferFromFileLog : public ReadBuffer diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 0f4563b6f35..64b82eb4000 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -17,11 +18,11 @@ #include #include #include -#include #include #include #include #include +#include #include @@ -37,7 +38,6 @@ namespace ErrorCodes extern const int CANNOT_READ_ALL_DATA; extern const int LOGICAL_ERROR; extern const int TABLE_METADATA_ALREADY_EXISTS; - extern const int DIRECTORY_DOESNT_EXIST; extern const int CANNOT_SELECT; extern const int QUERY_NOT_ALLOWED; } @@ -64,6 +64,7 @@ StorageFileLog::StorageFileLog( , metadata_base_path(std::filesystem::path(metadata_base_path_) / "metadata") , format_name(format_name_) , log(&Poco::Logger::get("StorageFileLog (" + table_id_.table_name + ")")) + , disk(getContext()->getStoragePolicy("default")->getDisks().at(0)) , milliseconds_to_wait(filelog_settings->poll_directory_watch_events_backoff_init.totalMilliseconds()) { StorageInMemoryMetadata storage_metadata; @@ -75,21 +76,14 @@ StorageFileLog::StorageFileLog( { if (!attach) { - std::error_code ec; - std::filesystem::create_directories(metadata_base_path, ec); - - if (ec) + if (disk->exists(metadata_base_path)) { - if (ec == std::make_error_code(std::errc::file_exists)) - { - throw Exception(ErrorCodes::TABLE_METADATA_ALREADY_EXISTS, - "Metadata files already exist by path: {}, remove them manually if it is intended", - metadata_base_path); - } - else - throw Exception(ErrorCodes::DIRECTORY_DOESNT_EXIST, - "Could not create directory {}, reason: {}", metadata_base_path, ec.message()); + throw Exception( + ErrorCodes::TABLE_METADATA_ALREADY_EXISTS, + "Metadata files already exist by path: {}, remove them manually if it is intended", + metadata_base_path); } + disk->createDirectories(metadata_base_path); } loadMetaFiles(attach); @@ -117,19 +111,8 @@ void StorageFileLog::loadMetaFiles(bool attach) /// Attach table if (attach) { - const auto & storage = getStorageID(); - - auto metadata_path_exist = std::filesystem::exists(metadata_base_path); - auto previous_path = std::filesystem::path(getContext()->getPath()) / ".filelog_storage_metadata" / storage.getDatabaseName() / storage.getTableName(); - - /// For compatibility with the previous path version. - if (std::filesystem::exists(previous_path) && !metadata_path_exist) - { - std::filesystem::copy(previous_path, metadata_base_path, std::filesystem::copy_options::recursive); - std::filesystem::remove_all(previous_path); - } /// Meta file may lost, log and create directory - else if (!metadata_path_exist) + if (!disk->exists(metadata_base_path)) { /// Create metadata_base_path directory when store meta data LOG_ERROR(log, "Metadata files of table {} are lost.", getStorageID().getTableName()); @@ -189,7 +172,7 @@ void StorageFileLog::loadFiles() /// data file have been renamed, need update meta file's name if (it->second.file_name != file) { - std::filesystem::rename(getFullMetaPath(it->second.file_name), getFullMetaPath(file)); + disk->replaceFile(getFullMetaPath(it->second.file_name), getFullMetaPath(file)); it->second.file_name = file; } } @@ -217,7 +200,7 @@ void StorageFileLog::loadFiles() valid_metas.emplace(inode, meta); /// Delete meta file from filesystem else - std::filesystem::remove(getFullMetaPath(meta.file_name)); + disk->removeFileIfExists(getFullMetaPath(meta.file_name)); } file_infos.meta_by_inode.swap(valid_metas); } @@ -228,70 +211,71 @@ void StorageFileLog::serialize() const for (const auto & [inode, meta] : file_infos.meta_by_inode) { auto full_name = getFullMetaPath(meta.file_name); - if (!std::filesystem::exists(full_name)) + if (!disk->exists(full_name)) { - FS::createFile(full_name); + disk->createFile(full_name); } else { checkOffsetIsValid(full_name, meta.last_writen_position); } - WriteBufferFromFile out(full_name); - writeIntText(inode, out); - writeChar('\n', out); - writeIntText(meta.last_writen_position, out); + auto out = disk->writeFile(full_name); + writeIntText(inode, *out); + writeChar('\n', *out); + writeIntText(meta.last_writen_position, *out); } } void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const { auto full_name = getFullMetaPath(file_meta.file_name); - if (!std::filesystem::exists(full_name)) + if (!disk->exists(full_name)) { - FS::createFile(full_name); + disk->createFile(full_name); } else { checkOffsetIsValid(full_name, file_meta.last_writen_position); } - WriteBufferFromFile out(full_name); - writeIntText(inode, out); - writeChar('\n', out); - writeIntText(file_meta.last_writen_position, out); + auto out = disk->writeFile(full_name); + writeIntText(inode, *out); + writeChar('\n', *out); + writeIntText(file_meta.last_writen_position, *out); } void StorageFileLog::deserialize() { - if (!std::filesystem::exists(metadata_base_path)) + if (!disk->exists(metadata_base_path)) return; /// In case of single file (not a watched directory), /// iterated directory always has one file inside. - for (const auto & dir_entry : std::filesystem::directory_iterator{metadata_base_path}) + for (const auto dir_iter = disk->iterateDirectory(metadata_base_path); dir_iter->isValid(); dir_iter->next()) { - if (!dir_entry.is_regular_file()) + auto full_name = getFullMetaPath(dir_iter->name()); + if (!disk->isFile(full_name)) { throw Exception( ErrorCodes::BAD_FILE_TYPE, "The file {} under {} is not a regular file when deserializing meta files", - dir_entry.path().c_str(), + dir_iter->name(), metadata_base_path); } - ReadBufferFromFile in(dir_entry.path().c_str()); + auto in = disk->readFile(full_name); FileMeta meta; UInt64 inode, last_written_pos; - if (!tryReadIntText(inode, in)) + if (!tryReadIntText(inode, *in)) { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_entry.path().c_str()); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path()); } - assertChar('\n', in); - if (!tryReadIntText(last_written_pos, in)) + assertChar('\n', *in); + if (!tryReadIntText(last_written_pos, *in)) { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_entry.path().c_str()); + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path()); } - meta.file_name = dir_entry.path().filename(); + meta.file_name = dir_iter->name(); meta.last_writen_position = last_written_pos; file_infos.meta_by_inode.emplace(inode, meta); @@ -506,17 +490,17 @@ void StorageFileLog::storeMetas(size_t start, size_t end) } } -void StorageFileLog::checkOffsetIsValid(const String & full_name, UInt64 offset) +void StorageFileLog::checkOffsetIsValid(const String & full_name, UInt64 offset) const { - ReadBufferFromFile in(full_name); + auto in = disk->readFile(full_name); UInt64 _, last_written_pos; - if (!tryReadIntText(_, in)) + if (!tryReadIntText(_, *in)) { throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name); } - assertChar('\n', in); - if (!tryReadIntText(last_written_pos, in)) + assertChar('\n', *in); + if (!tryReadIntText(last_written_pos, *in)) { throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name); } diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 56f2d40ef5a..9737c31acb6 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -147,6 +149,8 @@ private: const String format_name; Poco::Logger * log; + DiskPtr disk; + uint64_t milliseconds_to_wait; /// In order to avoid data race, using a naive trick to forbid execute two select @@ -198,7 +202,7 @@ private: void serialize(UInt64 inode, const FileMeta & file_meta) const; void deserialize(); - static void checkOffsetIsValid(const String & full_name, UInt64 offset); + void checkOffsetIsValid(const String & full_name, UInt64 offset) const; }; } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index f018a711284..a0585e9c9a1 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -617,12 +617,12 @@ public: /// Number of rows INSERTed since server start. /// - /// Does not takes underlying Storage (if any) into account. + /// Does not take the underlying Storage (if any) into account. virtual std::optional lifetimeRows() const { return {}; } /// Number of bytes INSERTed since server start. /// - /// Does not takes underlying Storage (if any) into account. + /// Does not take the underlying Storage (if any) into account. virtual std::optional lifetimeBytes() const { return {}; } /// Creates a storage snapshot from given metadata. diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 31b1c425709..831d9115708 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -21,16 +21,12 @@ limitations under the License. */ namespace DB { -using Time = std::chrono::time_point; -using Seconds = std::chrono::seconds; -using MilliSeconds = std::chrono::milliseconds; - struct BlocksMetadata { String hash; UInt64 version; - Time time; + std::chrono::time_point time; }; struct MergeableBlocks @@ -54,6 +50,10 @@ friend class LiveViewSource; friend class LiveViewEventsSource; friend class LiveViewSink; +using Time = std::chrono::time_point; +using Seconds = std::chrono::seconds; +using MilliSeconds = std::chrono::milliseconds; + public: StorageLiveView( const StorageID & table_id_, diff --git a/src/Storages/MergeTree/DataPartsExchange.cpp b/src/Storages/MergeTree/DataPartsExchange.cpp index 4f9c9ffd596..66f91aa6cd2 100644 --- a/src/Storages/MergeTree/DataPartsExchange.cpp +++ b/src/Storages/MergeTree/DataPartsExchange.cpp @@ -794,8 +794,6 @@ void Fetcher::downloadBasePartOrProjectionPartToDiskRemoteMeta( /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). - data_part_storage->removeSharedRecursive(true); - data_part_storage->commitTransaction(); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -855,7 +853,6 @@ void Fetcher::downloadBaseOrProjectionPartToDisk( /// NOTE The is_cancelled flag also makes sense to check every time you read over the network, /// performing a poll with a not very large timeout. /// And now we check it only between read chunks (in the `copyData` function). - data_part_storage->removeRecursive(); throw Exception("Fetching of part was cancelled", ErrorCodes::ABORTED); } @@ -934,22 +931,36 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDisk( CurrentMetrics::Increment metric_increment{CurrentMetrics::ReplicatedFetch}; - for (size_t i = 0; i < projections; ++i) + try { - String projection_name; - readStringBinary(projection_name, in); - MergeTreeData::DataPart::Checksums projection_checksum; + for (size_t i = 0; i < projections; ++i) + { + String projection_name; + readStringBinary(projection_name, in); + MergeTreeData::DataPart::Checksums projection_checksum; - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - projection_part_storage->createDirectories(); - downloadBaseOrProjectionPartToDisk( - replica_path, projection_part_storage, sync, in, projection_checksum, throttler); - checksums.addFile( - projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); + projection_part_storage->createDirectories(); + downloadBaseOrProjectionPartToDisk( + replica_path, projection_part_storage, sync, in, projection_checksum, throttler); + checksums.addFile( + projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + } + + // Download the base part + downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage, sync, in, checksums, throttler); + } + catch (const Exception & e) + { + /// Remove the whole part directory if fetch of base + /// part or fetch of any projection was stopped. + if (e.code() == ErrorCodes::ABORTED) + { + data_part_storage->removeRecursive(); + data_part_storage->commitTransaction(); + } + throw; } - - // Download the base part - downloadBaseOrProjectionPartToDisk(replica_path, data_part_storage, sync, in, checksums, throttler); assertEOF(in); data_part_storage->commitTransaction(); @@ -1007,23 +1018,37 @@ MergeTreeData::MutableDataPartPtr Fetcher::downloadPartToDiskRemoteMeta( data_part_storage->createDirectories(); - for (size_t i = 0; i < projections; ++i) + try { - String projection_name; - readStringBinary(projection_name, in); - MergeTreeData::DataPart::Checksums projection_checksum; + for (size_t i = 0; i < projections; ++i) + { + String projection_name; + readStringBinary(projection_name, in); + MergeTreeData::DataPart::Checksums projection_checksum; + + auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); + projection_part_storage->createDirectories(); + downloadBasePartOrProjectionPartToDiskRemoteMeta( + replica_path, projection_part_storage, in, projection_checksum, throttler); + + checksums.addFile( + projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + } - auto projection_part_storage = data_part_storage->getProjection(projection_name + ".proj"); - projection_part_storage->createDirectories(); downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, projection_part_storage, in, projection_checksum, throttler); - - checksums.addFile( - projection_name + ".proj", projection_checksum.getTotalSizeOnDisk(), projection_checksum.getTotalChecksumUInt128()); + replica_path, data_part_storage, in, checksums, throttler); + } + catch (const Exception & e) + { + if (e.code() == ErrorCodes::ABORTED) + { + /// Remove the whole part directory if fetch of base + /// part or fetch of any projection was stopped. + data_part_storage->removeSharedRecursive(true); + data_part_storage->commitTransaction(); + } + throw; } - - downloadBasePartOrProjectionPartToDiskRemoteMeta( - replica_path, data_part_storage, in, checksums, throttler); assertEOF(in); MergeTreeData::MutableDataPartPtr new_data_part; diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index e6c6f02b098..963e874b2a3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -46,6 +46,17 @@ class MarkCache; class UncompressedCache; class MergeTreeTransaction; + +enum class DataPartRemovalState +{ + NOT_ATTEMPTED, + VISIBLE_TO_TRANSACTIONS, + NON_UNIQUE_OWNERSHIP, + NOT_REACHED_REMOVAL_TIME, + HAS_SKIPPED_MUTATION_PARENT, + REMOVED, +}; + /// Description of the data part. class IMergeTreeDataPart : public std::enable_shared_from_this, public DataPartStorageHolder { @@ -446,6 +457,10 @@ public: void removeDeleteOnDestroyMarker(); void removeVersionMetadata(); + mutable std::atomic removal_state = DataPartRemovalState::NOT_ATTEMPTED; + + mutable std::atomic last_removal_attemp_time = 0; + protected: /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk diff --git a/src/Storages/MergeTree/MergeList.cpp b/src/Storages/MergeTree/MergeList.cpp index 76d69cc6b7d..a833da7064f 100644 --- a/src/Storages/MergeTree/MergeList.cpp +++ b/src/Storages/MergeTree/MergeList.cpp @@ -88,6 +88,10 @@ MergeListElement::MergeListElement( /// thread_group::memory_tracker, but MemoryTrackerThreadSwitcher will reset parent). memory_tracker.setProfilerStep(settings.memory_profiler_step); memory_tracker.setSampleProbability(settings.memory_profiler_sample_probability); + /// Specify sample probability also for current thread to track more deallocations. + if (auto * thread_memory_tracker = DB::CurrentThread::getMemoryTracker()) + thread_memory_tracker->setSampleProbability(settings.memory_profiler_sample_probability); + memory_tracker.setSoftLimit(settings.memory_overcommit_ratio_denominator); if (settings.memory_tracker_fault_probability > 0.0) memory_tracker.setFaultProbability(settings.memory_tracker_fault_probability); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 6b58e23b661..4dfa8b9a801 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -84,6 +84,7 @@ #include #include +#include #include #include #include @@ -1762,9 +1763,12 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) { const DataPartPtr & part = *it; + part->last_removal_attemp_time.store(time_now, std::memory_order_relaxed); + /// Do not remove outdated part if it may be visible for some transaction if (!part->version.canBeRemoved()) { + part->removal_state.store(DataPartRemovalState::VISIBLE_TO_TRANSACTIONS, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } @@ -1772,20 +1776,27 @@ MergeTreeData::DataPartsVector MergeTreeData::grabOldParts(bool force) /// Grab only parts that are not used by anyone (SELECTs for example). if (!part.unique()) { + part->removal_state.store(DataPartRemovalState::NON_UNIQUE_OWNERSHIP, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } auto part_remove_time = part->remove_time.load(std::memory_order_relaxed); - if ((part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds() && !has_skipped_mutation_parent(part)) + bool reached_removal_time = part_remove_time < time_now && time_now - part_remove_time > getSettings()->old_parts_lifetime.totalSeconds(); + if ((reached_removal_time && !has_skipped_mutation_parent(part)) || force || isInMemoryPart(part) /// Remove in-memory parts immediately to not store excessive data in RAM || (part->version.creation_csn == Tx::RolledBackCSN && getSettings()->remove_rolled_back_parts_immediately)) { + part->removal_state.store(DataPartRemovalState::REMOVED, std::memory_order_relaxed); parts_to_delete.emplace_back(it); } else { + if (!reached_removal_time) + part->removal_state.store(DataPartRemovalState::NOT_REACHED_REMOVAL_TIME, std::memory_order_relaxed); + else + part->removal_state.store(DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT, std::memory_order_relaxed); skipped_parts.push_back(part->info); continue; } @@ -2600,7 +2611,17 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, Context } } - dropped_columns.emplace(command.column_name); + if (old_metadata.columns.has(command.column_name)) + { + dropped_columns.emplace(command.column_name); + } + else + { + const auto & nested = old_metadata.columns.getNested(command.column_name); + for (const auto & nested_column : nested) + dropped_columns.emplace(nested_column.name); + } + } else if (command.type == AlterCommand::RESET_SETTING) { @@ -3884,9 +3905,9 @@ MergeTreeData::DataPartsVector MergeTreeData::getVisibleDataPartsVectorInPartiti return res; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInfo & part_info, const MergeTreeData::DataPartStates & valid_states, DataPartsLock * acquired_lock) { - auto lock = lockParts(); + auto lock = (acquired_lock) ? DataPartsLock() : lockParts(); auto it = data_parts_by_info.find(part_info); if (it == data_parts_by_info.end()) @@ -3899,9 +3920,9 @@ MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const MergeTreePartInf return nullptr; } -MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states) +MergeTreeData::DataPartPtr MergeTreeData::getPartIfExists(const String & part_name, const MergeTreeData::DataPartStates & valid_states, DataPartsLock * acquired_lock) { - return getPartIfExists(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states); + return getPartIfExists(MergeTreePartInfo::fromPartName(part_name, format_version), valid_states, acquired_lock); } @@ -4525,6 +4546,7 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r auto read_buffer = backup_entry->getReadBuffer(); auto write_buffer = disk->writeFile(temp_part_dir / filename); copyData(*read_buffer, *write_buffer); + write_buffer->finalize(); reservation->update(reservation->getSize() - backup_entry->getSize()); } @@ -7343,6 +7365,12 @@ StorageSnapshotPtr MergeTreeData::getStorageSnapshot(const StorageMetadataPtr & return std::make_shared(*this, metadata_snapshot, object_columns, std::move(snapshot_data)); } +StorageSnapshotPtr MergeTreeData::getStorageSnapshotWithoutParts(const StorageMetadataPtr & metadata_snapshot) const +{ + auto lock = lockParts(); + return std::make_shared(*this, metadata_snapshot, object_columns, std::make_unique()); +} + void MergeTreeData::incrementInsertedPartsProfileEvent(MergeTreeDataPartType type) { switch (type.getValue()) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 02303031baa..16e6e7aa809 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -450,6 +450,9 @@ public: StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & metadata_snapshot, ContextPtr query_context) const override; + /// The same as above but does not hold vector of data parts. + StorageSnapshotPtr getStorageSnapshotWithoutParts(const StorageMetadataPtr & metadata_snapshot) const; + /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks); @@ -514,8 +517,8 @@ public: DataPartsVector getDataPartsVectorInPartitionForInternalUsage(const DataPartStates & affordable_states, const String & partition_id, DataPartsLock * acquired_lock = nullptr) const; /// Returns the part with the given name and state or nullptr if no such part. - DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states); - DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states); + DataPartPtr getPartIfExists(const String & part_name, const DataPartStates & valid_states, DataPartsLock * acquired_lock = nullptr); + DataPartPtr getPartIfExists(const MergeTreePartInfo & part_info, const DataPartStates & valid_states, DataPartsLock * acquired_lock = nullptr); /// Total size of active parts in bytes. size_t getTotalActiveSizeInBytes() const; @@ -784,8 +787,6 @@ public: return column_sizes; } - const ColumnsDescription & getConcreteObjectColumns() const { return object_columns; } - /// Creates description of columns of data type Object from the range of data parts. static ColumnsDescription getConcreteObjectColumns( const DataPartsVector & parts, const ColumnsDescription & storage_columns); diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 13a72c24c59..0de71e94ea8 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -24,7 +24,7 @@ MergeTreeSink::MergeTreeSink( , metadata_snapshot(metadata_snapshot_) , max_parts_per_block(max_parts_per_block_) , context(context_) - , storage_snapshot(storage.getStorageSnapshot(metadata_snapshot, context)) + , storage_snapshot(storage.getStorageSnapshotWithoutParts(metadata_snapshot)) { } diff --git a/src/Storages/MergeTree/RPNBuilder.cpp b/src/Storages/MergeTree/RPNBuilder.cpp index d7ea68e7d64..b1d726335ae 100644 --- a/src/Storages/MergeTree/RPNBuilder.cpp +++ b/src/Storages/MergeTree/RPNBuilder.cpp @@ -172,7 +172,7 @@ ColumnWithTypeAndName RPNBuilderTreeNode::getConstantColumn() const if (ast_node) { - const auto * literal = assert_cast(ast_node); + const auto * literal = typeid_cast(ast_node); if (literal) { result.type = applyVisitor(FieldToDataType(), literal->value); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp index 47f10acb157..049d2c2adf5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeAttachThread.cpp @@ -142,6 +142,9 @@ void ReplicatedMergeTreeAttachThread::runImpl() checkHasReplicaMetadataInZooKeeper(zookeeper, replica_path); + /// Just in case it was not removed earlier due to connection loss + zookeeper->tryRemove(replica_path + "/flags/force_restore_data"); + String replica_metadata_version; const bool replica_metadata_version_exists = zookeeper->tryGet(replica_path + "/metadata_version", replica_metadata_version); if (replica_metadata_version_exists) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 1f3aac57969..9ff022d5d57 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1193,7 +1193,7 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry const LogEntry & another_entry = *entry_for_same_part_it->second; out_reason = fmt::format( "Not executing log entry {} of type {} for part {} " - "because another log entry {} of type {} for the same part ({}) is being processed. This shouldn't happen often.", + "because another log entry {} of type {} for the same part ({}) is being processed.", entry.znode_name, entry.type, entry.new_part_name, another_entry.znode_name, another_entry.type, another_entry.new_part_name); LOG_INFO(log, fmt::runtime(out_reason)); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp index 10ec4702b53..93724e4946d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeRestartingThread.cpp @@ -386,8 +386,13 @@ void ReplicatedMergeTreeRestartingThread::setReadonly(bool on_shutdown) CurrentMetrics::add(CurrentMetrics::ReadonlyReplica); /// Replica was already readonly, but we should decrement the metric, because we are detaching/dropping table. - if (on_shutdown) + /// if first pass wasn't done we don't have to decrement because it wasn't incremented in the first place + /// the task should be deactivated if it's full shutdown so no race is present + if (!first_time && on_shutdown) + { CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0); + } } void ReplicatedMergeTreeRestartingThread::setNotReadonly() @@ -397,7 +402,10 @@ void ReplicatedMergeTreeRestartingThread::setNotReadonly() /// because we don't want to change this metric if replication is started successfully. /// So we should not decrement it when replica stopped being readonly on startup. if (storage.is_readonly.compare_exchange_strong(old_val, false) && !first_time) + { CurrentMetrics::sub(CurrentMetrics::ReadonlyReplica); + assert(CurrentMetrics::get(CurrentMetrics::ReadonlyReplica) >= 0); + } } } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 2d8bf28e700..3da71c61482 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -203,7 +203,7 @@ ReplicatedMergeTreeSinkImpl::ReplicatedMergeTreeSinkImpl( , deduplicate(deduplicate_) , log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) , context(context_) - , storage_snapshot(storage.getStorageSnapshot(metadata_snapshot, context)) + , storage_snapshot(storage.getStorageSnapshotWithoutParts(metadata_snapshot)) { /// The quorum value `1` has the same meaning as if it is disabled. if (required_quorum_size == 1) @@ -485,7 +485,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa auto conflict_block_ids = commitPart(zookeeper, partition.temp_part.part, partition.block_id, delayed_chunk->replicas_num, false); if (conflict_block_ids.empty()) break; - LOG_DEBUG(log, "Found depulicate block IDs: {}, retry times {}", toString(conflict_block_ids), ++retry_times); + ++retry_times; + LOG_DEBUG(log, "Found duplicate block IDs: {}, retry times {}", toString(conflict_block_ids), retry_times); /// partition clean conflict rewriteBlock(log, partition, conflict_block_ids); if (partition.block_id.empty()) diff --git a/src/Storages/NamedCollectionConfiguration.cpp b/src/Storages/NamedCollections/NamedCollectionConfiguration.cpp similarity index 88% rename from src/Storages/NamedCollectionConfiguration.cpp rename to src/Storages/NamedCollections/NamedCollectionConfiguration.cpp index b0e7bdce32a..6875458958b 100644 --- a/src/Storages/NamedCollectionConfiguration.cpp +++ b/src/Storages/NamedCollections/NamedCollectionConfiguration.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -35,19 +35,30 @@ template T getConfigValueOrDefault( return *default_value; } - if constexpr (std::is_same_v) - return config.getString(path); - else if constexpr (std::is_same_v) - return config.getUInt64(path); - else if constexpr (std::is_same_v) - return config.getInt64(path); - else if constexpr (std::is_same_v) - return config.getDouble(path); - else + try + { + if constexpr (std::is_same_v) + return config.getString(path); + else if constexpr (std::is_same_v) + return config.getUInt64(path); + else if constexpr (std::is_same_v) + return config.getInt64(path); + else if constexpr (std::is_same_v) + return config.getDouble(path); + else + throw Exception( + ErrorCodes::NOT_IMPLEMENTED, + "Unsupported type in getConfigValueOrDefault(). " + "Supported types are String, UInt64, Int64, Float64"); + } + catch (const Poco::SyntaxException &) + { throw Exception( - ErrorCodes::NOT_IMPLEMENTED, - "Unsupported type in getConfigValueOrDefault(). " - "Supported types are String, UInt64, Int64, Float64"); + ErrorCodes::BAD_ARGUMENTS, + "Cannot extract {} from {}", + toString(magic_enum::enum_name(Field::TypeToEnum>::value)), + path); + } } template void setConfigValue( diff --git a/src/Storages/NamedCollectionConfiguration.h b/src/Storages/NamedCollections/NamedCollectionConfiguration.h similarity index 100% rename from src/Storages/NamedCollectionConfiguration.h rename to src/Storages/NamedCollections/NamedCollectionConfiguration.h diff --git a/src/Storages/NamedCollectionUtils.cpp b/src/Storages/NamedCollections/NamedCollectionUtils.cpp similarity index 97% rename from src/Storages/NamedCollectionUtils.cpp rename to src/Storages/NamedCollections/NamedCollectionUtils.cpp index 75d5aace664..c4caa5c95f6 100644 --- a/src/Storages/NamedCollectionUtils.cpp +++ b/src/Storages/NamedCollections/NamedCollectionUtils.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,8 +13,8 @@ #include #include #include -#include -#include +#include +#include namespace fs = std::filesystem; @@ -78,7 +78,7 @@ public: /// (`enumerate_result` == ). const bool collection_is_empty = enumerate_result.size() == 1 && *enumerate_result.begin() == collection_prefix; - std::set keys; + std::set> keys; if (!collection_is_empty) { /// Skip collection prefix and add +1 to avoid '.' in the beginning. @@ -296,7 +296,7 @@ private: const auto config = NamedCollectionConfiguration::createConfiguration( collection_name, query.changes); - std::set keys; + std::set> keys; for (const auto & [name, _] : query.changes) keys.insert(name); diff --git a/src/Storages/NamedCollectionUtils.h b/src/Storages/NamedCollections/NamedCollectionUtils.h similarity index 100% rename from src/Storages/NamedCollectionUtils.h rename to src/Storages/NamedCollections/NamedCollectionUtils.h diff --git a/src/Storages/NamedCollections.cpp b/src/Storages/NamedCollections/NamedCollections.cpp similarity index 92% rename from src/Storages/NamedCollections.cpp rename to src/Storages/NamedCollections/NamedCollections.cpp index d90225547ac..03633bbd370 100644 --- a/src/Storages/NamedCollections.cpp +++ b/src/Storages/NamedCollections/NamedCollections.cpp @@ -3,8 +3,8 @@ #include #include #include -#include -#include +#include +#include #include #include @@ -234,6 +234,16 @@ public: return keys; } + Keys::const_iterator begin() const + { + return keys.begin(); + } + + Keys::const_iterator end() const + { + return keys.end(); + } + std::string dumpStructure() const { /// Convert a collection config like @@ -375,6 +385,22 @@ NamedCollection::Keys NamedCollection::getKeys() const return pimpl->getKeys(); } +template NamedCollection::const_iterator NamedCollection::begin() const +{ + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + return pimpl->begin(); +} + +template NamedCollection::const_iterator NamedCollection::end() const +{ + std::unique_lock lock(mutex, std::defer_lock); + if constexpr (!Locked) + lock.lock(); + return pimpl->end(); +} + std::string NamedCollection::dumpStructure() const { std::lock_guard lock(mutex); @@ -417,4 +443,8 @@ template void NamedCollection::setOrUpdate(const NamedCollection template void NamedCollection::remove(const Key & key); template void NamedCollection::remove(const Key & key); +template NamedCollection::const_iterator NamedCollection::begin() const; +template NamedCollection::const_iterator NamedCollection::begin() const; +template NamedCollection::const_iterator NamedCollection::end() const; +template NamedCollection::const_iterator NamedCollection::end() const; } diff --git a/src/Storages/NamedCollections.h b/src/Storages/NamedCollections/NamedCollections.h similarity index 89% rename from src/Storages/NamedCollections.h rename to src/Storages/NamedCollections/NamedCollections.h index f7181c2b539..53b7a828a63 100644 --- a/src/Storages/NamedCollections.h +++ b/src/Storages/NamedCollections/NamedCollections.h @@ -1,7 +1,7 @@ #pragma once #include -#include -#include +#include +#include namespace Poco { namespace Util { class AbstractConfiguration; } } @@ -22,7 +22,7 @@ class NamedCollection { public: using Key = std::string; - using Keys = std::set; + using Keys = std::set>; using SourceId = NamedCollectionUtils::SourceId; static MutableNamedCollectionPtr create( @@ -49,6 +49,13 @@ public: Keys getKeys() const; + using iterator = typename Keys::iterator; + using const_iterator = typename Keys::const_iterator; + + template const_iterator begin() const; + + template const_iterator end() const; + std::string dumpStructure() const; bool isMutable() const { return is_mutable; } diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp new file mode 100644 index 00000000000..cceabdfd7bf --- /dev/null +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.cpp @@ -0,0 +1,112 @@ +#include "NamedCollectionsHelpers.h" +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + NamedCollectionPtr tryGetNamedCollectionFromASTs(ASTs asts) + { + if (asts.empty()) + return nullptr; + + const auto * identifier = asts[0]->as(); + if (!identifier) + return nullptr; + + const auto & collection_name = identifier->name(); + return NamedCollectionFactory::instance().tryGet(collection_name); + } + + std::optional> getKeyValueFromAST(ASTPtr ast) + { + const auto * function = ast->as(); + if (!function || function->name != "equals") + return std::nullopt; + + const auto * function_args_expr = assert_cast(function->arguments.get()); + const auto & function_args = function_args_expr->children; + + if (function_args.size() != 2) + return std::nullopt; + + auto literal_key = evaluateConstantExpressionOrIdentifierAsLiteral( + function_args[0], Context::getGlobalContextInstance()); + auto key = checkAndGetLiteralArgument(literal_key, "key"); + + auto literal_value = evaluateConstantExpressionOrIdentifierAsLiteral( + function_args[1], Context::getGlobalContextInstance()); + auto value = literal_value->as()->value; + + return std::pair{key, value}; + } +} + + +NamedCollectionPtr tryGetNamedCollectionWithOverrides(ASTs asts) +{ + if (asts.empty()) + return nullptr; + + auto collection = tryGetNamedCollectionFromASTs(asts); + if (!collection) + return nullptr; + + if (asts.size() == 1) + return collection; + + auto collection_copy = collection->duplicate(); + + for (const auto & ast : asts) + { + auto value_override = getKeyValueFromAST(ast); + if (!value_override) + continue; + + const auto & [key, value] = *value_override; + collection_copy->setOrUpdate(key, toString(value)); + } + + return collection_copy; +} + +void validateNamedCollection( + const NamedCollection & collection, + const std::unordered_set & required_keys, + const std::unordered_set & optional_keys) +{ + const auto & keys = collection.getKeys(); + for (const auto & key : keys) + { + if (!required_keys.contains(key) && !optional_keys.contains(key)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Unexpected key `{}` in named collection. Required keys: {}, optional keys: {}", + key, fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); + } + } + + for (const auto & key : required_keys) + { + if (!keys.contains(key)) + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Key `{}` is required, but not specified. Required keys: {}, optional keys: {}", + key, fmt::join(required_keys, ", "), fmt::join(optional_keys, ", ")); + } + } +} + +} diff --git a/src/Storages/NamedCollections/NamedCollectionsHelpers.h b/src/Storages/NamedCollections/NamedCollectionsHelpers.h new file mode 100644 index 00000000000..39baafa9039 --- /dev/null +++ b/src/Storages/NamedCollections/NamedCollectionsHelpers.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include +#include +#include + + +namespace DB +{ + +NamedCollectionPtr tryGetNamedCollectionWithOverrides(ASTs asts); + +void validateNamedCollection( + const NamedCollection & collection, + const std::unordered_set & required_keys, + const std::unordered_set & optional_keys); + +} diff --git a/src/Storages/NamedCollections_fwd.h b/src/Storages/NamedCollections/NamedCollections_fwd.h similarity index 100% rename from src/Storages/NamedCollections_fwd.h rename to src/Storages/NamedCollections/NamedCollections_fwd.h diff --git a/src/Storages/ReadFromStorageProgress.cpp b/src/Storages/ReadFromStorageProgress.cpp new file mode 100644 index 00000000000..48e64d7968b --- /dev/null +++ b/src/Storages/ReadFromStorageProgress.cpp @@ -0,0 +1,53 @@ +#include +#include +#include + +namespace DB +{ + +void updateRowsProgressApprox( + ISource & source, + const Chunk & chunk, + UInt64 total_result_size, + UInt64 & total_rows_approx_accumulated, + size_t & total_rows_count_times, + UInt64 & total_rows_approx_max) +{ + if (!total_result_size) + return; + + const size_t num_rows = chunk.getNumRows(); + + if (!num_rows) + return; + + const auto progress = source.getReadProgress(); + if (progress && !progress->limits.empty()) + { + for (const auto & limit : progress->limits) + { + if (limit.leaf_limits.max_rows || limit.leaf_limits.max_bytes + || limit.local_limits.size_limits.max_rows || limit.local_limits.size_limits.max_bytes) + return; + } + } + + const auto bytes_per_row = std::ceil(static_cast(chunk.bytes()) / num_rows); + size_t total_rows_approx = static_cast(std::ceil(static_cast(total_result_size) / bytes_per_row)); + total_rows_approx_accumulated += total_rows_approx; + ++total_rows_count_times; + total_rows_approx = total_rows_approx_accumulated / total_rows_count_times; + + /// We need to add diff, because total_rows_approx is incremental value. + /// It would be more correct to send total_rows_approx as is (not a diff), + /// but incrementation of total_rows_to_read does not allow that. + /// A new counter can be introduced for that to be sent to client, but it does not worth it. + if (total_rows_approx > total_rows_approx_max) + { + size_t diff = total_rows_approx - total_rows_approx_max; + source.addTotalRowsApprox(diff); + total_rows_approx_max = total_rows_approx; + } +} + +} diff --git a/src/Storages/ReadFromStorageProgress.h b/src/Storages/ReadFromStorageProgress.h new file mode 100644 index 00000000000..9f45845ac6e --- /dev/null +++ b/src/Storages/ReadFromStorageProgress.h @@ -0,0 +1,18 @@ +#pragma once +#include + +namespace DB +{ + +class ISource; +class Chunk; + +void updateRowsProgressApprox( + ISource & source, + const Chunk & chunk, + UInt64 total_result_size, + UInt64 & total_rows_approx_accumulated, + size_t & total_rows_count_times, + UInt64 & total_rows_approx_max); + +} diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6e032a47943..d355d021313 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -592,22 +593,8 @@ public: if (num_rows) { - auto bytes_per_row = std::ceil(static_cast(chunk.bytes()) / num_rows); - size_t total_rows_approx = static_cast(std::ceil(static_cast(files_info->total_bytes_to_read) / bytes_per_row)); - total_rows_approx_accumulated += total_rows_approx; - ++total_rows_count_times; - total_rows_approx = total_rows_approx_accumulated / total_rows_count_times; - - /// We need to add diff, because total_rows_approx is incremental value. - /// It would be more correct to send total_rows_approx as is (not a diff), - /// but incrementation of total_rows_to_read does not allow that. - /// A new field can be introduces for that to be sent to client, but it does not worth it. - if (total_rows_approx > total_rows_approx_prev) - { - size_t diff = total_rows_approx - total_rows_approx_prev; - addTotalRowsApprox(diff); - total_rows_approx_prev = total_rows_approx; - } + updateRowsProgressApprox( + *this, chunk, files_info->total_bytes_to_read, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); } return chunk; } @@ -648,7 +635,7 @@ private: UInt64 total_rows_approx_accumulated = 0; size_t total_rows_count_times = 0; - UInt64 total_rows_approx_prev = 0; + UInt64 total_rows_approx_max = 0; }; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index aec28b37928..86d56e43b25 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -174,6 +174,9 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, "Table {} needs the same join_use_nulls setting as present in LEFT or FULL JOIN", getStorageID().getNameForLogs()); + if (analyzed_join->getClauses().size() != 1) + throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "JOIN keys should match to the Join engine keys [{}]", fmt::join(getKeyNames(), ", ")); + const auto & join_on = analyzed_join->getOnlyClause(); if (join_on.on_filter_condition_left || join_on.on_filter_condition_right) throw Exception(ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN, "ON section of JOIN with filter conditions is not implemented"); @@ -211,9 +214,9 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, left_key_names_resorted.push_back(key_names_left[key_position]); } - /// Set names qualifiers: table.column -> column - /// It's required because storage join stores non-qualified names - /// Qualifies will be added by join implementation (HashJoin) + /// Set qualified identifiers to original names (table.column -> column). + /// It's required because storage join stores non-qualified names. + /// Qualifies will be added by join implementation (TableJoin contains a rename mapping). analyzed_join->setRightKeys(key_names); analyzed_join->setLeftKeys(left_key_names_resorted); diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 8ed33220507..b8920647244 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -1032,6 +1032,7 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p auto in = backup_entry->getReadBuffer(); auto out = disk->writeFile(data_file.path, max_compress_block_size, WriteMode::Append); copyData(*in, *out); + out->finalize(); } if (use_marks_file) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index bd57579505a..5eb30f404c1 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -118,7 +118,6 @@ StorageMergeTree::StorageMergeTree( void StorageMergeTree::startup() { - clearOldPartsFromFilesystem(); clearOldWriteAheadLogs(); clearEmptyParts(); @@ -185,26 +184,6 @@ void StorageMergeTree::shutdown() if (deduplication_log) deduplication_log->shutdown(); - - try - { - /// We clear all old parts after stopping all background operations. - /// It's important, because background operations can produce temporary - /// parts which will remove themselves in their destructors. If so, we - /// may have race condition between our remove call and background - /// process. - /// Do not clear old parts in case when server is shutting down because it failed to start due to some exception. - - if (Context::getGlobalContextInstance()->getApplicationType() == Context::ApplicationType::SERVER - && Context::getGlobalContextInstance()->isServerCompletelyStarted()) - clearOldPartsFromFilesystem(true); - } - catch (...) - { - /// Example: the case of readonly filesystem, we have failure removing old parts. - /// Should not prevent table shutdown. - tryLogCurrentException(log); - } } @@ -1391,19 +1370,21 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c { /// Forcefully stop merges and make part outdated auto merge_blocker = stopMergesAndWait(); - auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); + auto parts_lock = lockParts(); + auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}, &parts_lock); if (!part) throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} not found, won't try to drop it.", part_name); - removePartsFromWorkingSet(txn, {part}, true); + removePartsFromWorkingSet(txn, {part}, true, &parts_lock); return part; } else { /// Wait merges selector std::unique_lock lock(currently_processing_in_background_mutex); + auto parts_lock = lockParts(); - auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}); + auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Active}, &parts_lock); /// It's okay, part was already removed if (!part) return nullptr; @@ -1413,7 +1394,7 @@ MergeTreeDataPartPtr StorageMergeTree::outdatePart(MergeTreeTransaction * txn, c if (currently_merging_mutating_parts.contains(part)) return nullptr; - removePartsFromWorkingSet(txn, {part}, true); + removePartsFromWorkingSet(txn, {part}, true, &parts_lock); return part; } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index ff2cf14399c..99ceb1d90ae 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -357,25 +357,37 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( /// It does not make sense for CREATE query if (attach) { - if (current_zookeeper && current_zookeeper->exists(replica_path + "/host")) + try { - /// Check it earlier if we can (we don't want incompatible version to start). - /// If "/host" doesn't exist, then replica is probably dropped and there's nothing to check. - ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(current_zookeeper, replica_path); + if (current_zookeeper && current_zookeeper->exists(replica_path + "/host")) + { + /// Check it earlier if we can (we don't want incompatible version to start). + /// If "/host" doesn't exist, then replica is probably dropped and there's nothing to check. + ReplicatedMergeTreeAttachThread::checkHasReplicaMetadataInZooKeeper(current_zookeeper, replica_path); + } + + if (current_zookeeper && current_zookeeper->exists(replica_path + "/flags/force_restore_data")) + { + skip_sanity_checks = true; + current_zookeeper->remove(replica_path + "/flags/force_restore_data"); + + LOG_WARNING( + log, + "Skipping the limits on severity of changes to data parts and columns (flag {}/flags/force_restore_data).", + replica_path); + } + else if (has_force_restore_data_flag) + { + skip_sanity_checks = true; + + LOG_WARNING(log, "Skipping the limits on severity of changes to data parts and columns (flag force_restore_data)."); + } } - - if (current_zookeeper && current_zookeeper->exists(replica_path + "/flags/force_restore_data")) + catch (const Coordination::Exception & e) { - skip_sanity_checks = true; - current_zookeeper->remove(replica_path + "/flags/force_restore_data"); - - LOG_WARNING(log, "Skipping the limits on severity of changes to data parts and columns (flag {}/flags/force_restore_data).", replica_path); - } - else if (has_force_restore_data_flag) - { - skip_sanity_checks = true; - - LOG_WARNING(log, "Skipping the limits on severity of changes to data parts and columns (flag force_restore_data)."); + if (!Coordination::isHardwareError(e.code)) + throw; + LOG_ERROR(log, "Caught exception while checking table metadata in ZooKeeper, will recheck later: {}", e.displayText()); } } @@ -621,6 +633,8 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/pinned_part_uuids", getPinnedPartUUIDs()->toString(), zkutil::CreateMode::Persistent)); /// For ALTER PARTITION with multi-leaders futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/alter_partition_version", String(), zkutil::CreateMode::Persistent)); + /// For deduplication of async inserts + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/async_blocks", String(), zkutil::CreateMode::Persistent)); /// As for now, "/temp" node must exist, but we want to be able to remove it in future if (zookeeper->exists(zookeeper_path + "/temp")) @@ -4523,7 +4537,7 @@ SinkToStoragePtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, con const auto storage_settings_ptr = getSettings(); const Settings & query_settings = local_context->getSettingsRef(); bool deduplicate = storage_settings_ptr->replicated_deduplication_window != 0 && query_settings.insert_deduplicate; - bool async_deduplicate = query_settings.async_insert && storage_settings_ptr->replicated_deduplication_window_for_async_inserts != 0 && query_settings.insert_deduplicate; + bool async_deduplicate = query_settings.async_insert && query_settings.async_insert_deduplicate && storage_settings_ptr->replicated_deduplication_window_for_async_inserts != 0 && query_settings.insert_deduplicate; if (async_deduplicate) return std::make_shared( *this, metadata_snapshot, query_settings.insert_quorum.valueOr(0), @@ -6550,7 +6564,7 @@ void StorageReplicatedMergeTree::getClearBlocksInPartitionOpsImpl( { Strings blocks; if (Coordination::Error::ZOK != zookeeper.tryGetChildren(fs::path(zookeeper_path) / blocks_dir_name, blocks)) - throw Exception(zookeeper_path + "/" + blocks_dir_name + "blocks doesn't exist", ErrorCodes::NOT_FOUND_NODE); + throw Exception(ErrorCodes::NOT_FOUND_NODE, "Node {}/{} doesn't exist", zookeeper_path, blocks_dir_name); String partition_prefix = partition_id + "_"; Strings paths_to_get; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 22ce95c35e7..9b50b41ddaf 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -14,7 +14,6 @@ #include #include -#include #include #include @@ -28,6 +27,13 @@ #include #include #include +#include +#include +#include + +#include +#include +#include #include #include @@ -41,7 +47,6 @@ #include #include -#include #include @@ -63,8 +68,6 @@ namespace fs = std::filesystem; -static const String PARTITION_ID_WILDCARD = "{_partition_id}"; - namespace ProfileEvents { extern const Event S3DeleteObjects; @@ -74,6 +77,28 @@ namespace ProfileEvents namespace DB { +static const String PARTITION_ID_WILDCARD = "{_partition_id}"; + +static const std::unordered_set required_configuration_keys = { + "url", +}; +static std::unordered_set optional_configuration_keys = { + "format", + "compression", + "compression_method", + "structure", + "access_key_id", + "secret_access_key", + "filename", + "use_environment_credentials", + "max_single_read_retries", + "min_upload_part_size", + "upload_part_size_multiply_factor", + "upload_part_size_multiply_parts_count_threshold", + "max_single_part_upload_size", + "max_connections", +}; + namespace ErrorCodes { extern const int CANNOT_PARSE_TEXT; @@ -89,9 +114,25 @@ namespace ErrorCodes class IOutputFormat; using OutputFormatPtr = std::shared_ptr; +static void addPathToVirtualColumns(Block & block, const String & path, size_t idx) +{ + if (block.has("_path")) + block.getByName("_path").column->assumeMutableRef().insert(path); + + if (block.has("_file")) + { + auto pos = path.find_last_of('/'); + assert(pos != std::string::npos); + + auto file = path.substr(pos + 1); + block.getByName("_file").column->assumeMutableRef().insert(file); + } + + block.getByName("_idx").column->assumeMutableRef().insert(idx); +} + class StorageS3Source::DisclosedGlobIterator::Impl : WithContext { - public: Impl( const Aws::S3::S3Client & client_, @@ -99,7 +140,7 @@ public: ASTPtr & query_, const Block & virtual_header_, ContextPtr context_, - std::unordered_map * object_infos_, + ObjectInfos * object_infos_, Strings * read_keys_, const S3Settings::RequestSettings & request_settings_) : WithContext(context_) @@ -110,6 +151,8 @@ public: , object_infos(object_infos_) , read_keys(read_keys_) , request_settings(request_settings_) + , list_objects_pool(1) + , list_objects_scheduler(threadPoolCallbackRunner(list_objects_pool, "ListObjects")) { if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); @@ -119,43 +162,42 @@ public: /// We don't have to list bucket, because there is no asterisks. if (key_prefix.size() == globbed_uri.key.size()) { - buffer.emplace_back(globbed_uri.key); + buffer.emplace_back(globbed_uri.key, std::nullopt); buffer_iter = buffer.begin(); is_finished = true; return; } - /// Create a virtual block with one row to construct filter - if (query && virtual_header) - { - /// Append "key" column as the filter result - virtual_header.insert({ColumnString::create(), std::make_shared(), "_key"}); - - auto block = virtual_header.cloneEmpty(); - MutableColumns columns = block.mutateColumns(); - for (auto & column : columns) - column->insertDefault(); - block.setColumns(std::move(columns)); - VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); - } - request.SetBucket(globbed_uri.bucket); request.SetPrefix(key_prefix); + outcome_future = listObjectsAsync(); + matcher = std::make_unique(makeRegexpPatternFromGlobs(globbed_uri.key)); recursive = globbed_uri.key == "/**" ? true : false; fillInternalBufferAssumeLocked(); } - String next() + KeyWithInfo next() { std::lock_guard lock(mutex); return nextAssumeLocked(); } -private: + size_t getTotalSize() const + { + return total_size; + } - String nextAssumeLocked() + ~Impl() + { + list_objects_pool.wait(); + } + +private: + using ListObjectsOutcome = Aws::S3::Model::ListObjectsV2Outcome; + + KeyWithInfo nextAssumeLocked() { if (buffer_iter != buffer.end()) { @@ -168,7 +210,6 @@ private: return {}; fillInternalBufferAssumeLocked(); - return nextAssumeLocked(); } @@ -176,8 +217,9 @@ private: { buffer.clear(); - ProfileEvents::increment(ProfileEvents::S3ListObjects); - outcome = client.ListObjectsV2(request); + assert(outcome_future.valid()); + auto outcome = outcome_future.get(); + if (!outcome.IsSuccess()) throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", quoteString(request.GetBucket()), quoteString(request.GetPrefix()), @@ -185,82 +227,137 @@ private: const auto & result_batch = outcome.GetResult().GetContents(); + /// It returns false when all objects were returned + is_finished = !outcome.GetResult().GetIsTruncated(); + + if (!is_finished) + { + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + list_objects_pool.wait(); + outcome_future = listObjectsAsync(); + } + + KeysWithInfo temp_buffer; + temp_buffer.reserve(result_batch.size()); + + for (const auto & row : result_batch) + { + String key = row.GetKey(); + if (recursive || re2::RE2::FullMatch(key, *matcher)) + { + S3::ObjectInfo info = + { + .size = size_t(row.GetSize()), + .last_modification_time = row.GetLastModified().Millis() / 1000, + }; + + if (object_infos) + (*object_infos)[fs::path(globbed_uri.bucket) / key] = info; + + temp_buffer.emplace_back(std::move(key), std::move(info)); + } + } + + if (temp_buffer.empty()) + { + buffer_iter = buffer.begin(); + return; + } + + if (!is_initialized) + { + createFilterAST(temp_buffer.front().key); + is_initialized = true; + } + if (filter_ast) { auto block = virtual_header.cloneEmpty(); - MutableColumnPtr path_column; - MutableColumnPtr file_column; - MutableColumnPtr key_column = block.getByName("_key").column->assumeMutable(); - - if (block.has("_path")) - path_column = block.getByName("_path").column->assumeMutable(); - - if (block.has("_file")) - file_column = block.getByName("_file").column->assumeMutable(); - - for (const auto & row : result_batch) - { - const String & key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - { - String path = fs::path(globbed_uri.bucket) / key; - if (object_infos) - (*object_infos)[path] = {.size = size_t(row.GetSize()), .last_modification_time = row.GetLastModified().Millis() / 1000}; - String file = path.substr(path.find_last_of('/') + 1); - if (path_column) - path_column->insert(path); - if (file_column) - file_column->insert(file); - key_column->insert(key); - } - } + for (size_t i = 0; i < temp_buffer.size(); ++i) + addPathToVirtualColumns(block, fs::path(globbed_uri.bucket) / temp_buffer[i].key, i); VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const ColumnString & keys = typeid_cast(*block.getByName("_key").column); - size_t rows = block.rows(); - buffer.reserve(rows); - for (size_t i = 0; i < rows; ++i) - buffer.emplace_back(keys.getDataAt(i).toString()); + const auto & idxs = typeid_cast(*block.getByName("_idx").column); + + buffer.reserve(block.rows()); + for (UInt64 idx : idxs.getData()) + { + total_size += temp_buffer[idx].info->size; + buffer.emplace_back(std::move(temp_buffer[idx])); + } } else { - buffer.reserve(result_batch.size()); - for (const auto & row : result_batch) - { - String key = row.GetKey(); - if (recursive || re2::RE2::FullMatch(key, *matcher)) - buffer.emplace_back(std::move(key)); - } + buffer = std::move(temp_buffer); + for (const auto & [_, info] : buffer) + total_size += info->size; } /// Set iterator only after the whole batch is processed buffer_iter = buffer.begin(); - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - - /// It returns false when all objects were returned - is_finished = !outcome.GetResult().GetIsTruncated(); - if (read_keys) - read_keys->insert(read_keys->end(), buffer.begin(), buffer.end()); + { + read_keys->reserve(read_keys->size() + buffer.size()); + for (const auto & [key, _] : buffer) + read_keys->push_back(key); + } + } + + void createFilterAST(const String & any_key) + { + if (!query || !virtual_header) + return; + + /// Create a virtual block with one row to construct filter + /// Append "idx" column as the filter result + virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); + + auto block = virtual_header.cloneEmpty(); + addPathToVirtualColumns(block, fs::path(globbed_uri.bucket) / any_key, 0); + VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); + } + + std::future listObjectsAsync() + { + return list_objects_scheduler([this] + { + ProfileEvents::increment(ProfileEvents::S3ListObjects); + auto outcome = client.ListObjectsV2(request); + + /// Outcome failure will be handled on the caller side. + if (outcome.IsSuccess()) + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); + + return outcome; + }, 0); } std::mutex mutex; - Strings buffer; - Strings::iterator buffer_iter; + + KeysWithInfo buffer; + KeysWithInfo::iterator buffer_iter; + Aws::S3::S3Client client; S3::URI globbed_uri; ASTPtr query; Block virtual_header; + bool is_initialized{false}; ASTPtr filter_ast; - Aws::S3::Model::ListObjectsV2Request request; - Aws::S3::Model::ListObjectsV2Outcome outcome; std::unique_ptr matcher; bool recursive{false}; bool is_finished{false}; - std::unordered_map * object_infos; + ObjectInfos * object_infos; Strings * read_keys; + + Aws::S3::Model::ListObjectsV2Request request; S3Settings::RequestSettings request_settings; + + ThreadPool list_objects_pool; + ThreadPoolCallbackRunner list_objects_scheduler; + std::future outcome_future; + size_t total_size = 0; }; StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( @@ -269,36 +366,51 @@ StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator( ASTPtr query, const Block & virtual_header, ContextPtr context, - std::unordered_map * object_infos_, + ObjectInfos * object_infos_, Strings * read_keys_, const S3Settings::RequestSettings & request_settings_) : pimpl(std::make_shared(client_, globbed_uri_, query, virtual_header, context, object_infos_, read_keys_, request_settings_)) { } -String StorageS3Source::DisclosedGlobIterator::next() +StorageS3Source::KeyWithInfo StorageS3Source::DisclosedGlobIterator::next() { return pimpl->next(); } +size_t StorageS3Source::DisclosedGlobIterator::getTotalSize() const +{ + return pimpl->getTotalSize(); +} + class StorageS3Source::KeysIterator::Impl : WithContext { public: explicit Impl( - const std::vector & keys_, const String & bucket_, ASTPtr query_, const Block & virtual_header_, ContextPtr context_) - : WithContext(context_), keys(keys_), bucket(bucket_), query(query_), virtual_header(virtual_header_) + const Aws::S3::S3Client & client_, + const std::string & version_id_, + const std::vector & keys_, + const String & bucket_, + ASTPtr query_, + const Block & virtual_header_, + ContextPtr context_, + ObjectInfos * object_infos_, + Strings * read_keys_) + : WithContext(context_) + , bucket(bucket_) + , query(query_) + , virtual_header(virtual_header_) { + Strings all_keys = keys_; + /// Create a virtual block with one row to construct filter - if (query && virtual_header) + if (query && virtual_header && !all_keys.empty()) { - /// Append "key" column as the filter result - virtual_header.insert({ColumnString::create(), std::make_shared(), "_key"}); + /// Append "idx" column as the filter result + virtual_header.insert({ColumnUInt64::create(), std::make_shared(), "_idx"}); auto block = virtual_header.cloneEmpty(); - MutableColumns columns = block.mutateColumns(); - for (auto & column : columns) - column->insertDefault(); - block.setColumns(std::move(columns)); + addPathToVirtualColumns(block, fs::path(bucket) / all_keys.front(), 0); ASTPtr filter_ast; VirtualColumnUtils::prepareFilterBlockWithQuery(query, getContext(), block, filter_ast); @@ -306,68 +418,94 @@ public: if (filter_ast) { block = virtual_header.cloneEmpty(); - MutableColumnPtr path_column; - MutableColumnPtr file_column; - MutableColumnPtr key_column = block.getByName("_key").column->assumeMutable(); - - if (block.has("_path")) - path_column = block.getByName("_path").column->assumeMutable(); - - if (block.has("_file")) - file_column = block.getByName("_file").column->assumeMutable(); - - for (const auto & key : keys) - { - String path = fs::path(bucket) / key; - String file = path.substr(path.find_last_of('/') + 1); - if (path_column) - path_column->insert(path); - if (file_column) - file_column->insert(file); - key_column->insert(key); - } + for (size_t i = 0; i < all_keys.size(); ++i) + addPathToVirtualColumns(block, fs::path(bucket) / all_keys[i], i); VirtualColumnUtils::filterBlockWithQuery(query, block, getContext(), filter_ast); - const ColumnString & keys_col = typeid_cast(*block.getByName("_key").column); - size_t rows = block.rows(); - Strings filtered_keys; - filtered_keys.reserve(rows); - for (size_t i = 0; i < rows; ++i) - filtered_keys.emplace_back(keys_col.getDataAt(i).toString()); + const auto & idxs = typeid_cast(*block.getByName("_idx").column); - keys = std::move(filtered_keys); + Strings filtered_keys; + filtered_keys.reserve(block.rows()); + for (UInt64 idx : idxs.getData()) + filtered_keys.emplace_back(std::move(all_keys[idx])); + + all_keys = std::move(filtered_keys); } } + + if (read_keys_) + *read_keys_ = all_keys; + + for (auto && key : all_keys) + { + std::optional info; + + /// To avoid extra requests update total_size only if object_infos != nullptr + /// (which means we eventually need this info anyway, so it should be ok to do it now) + if (object_infos_) + { + info = S3::getObjectInfo(client_, bucket, key, version_id_, true, false); + total_size += info->size; + + String path = fs::path(bucket) / key; + (*object_infos_)[std::move(path)] = *info; + } + + keys.emplace_back(std::move(key), std::move(info)); + } } - String next() + KeyWithInfo next() { size_t current_index = index.fetch_add(1, std::memory_order_relaxed); if (current_index >= keys.size()) - return ""; + return {}; + return keys[current_index]; } + size_t getTotalSize() const + { + return total_size; + } + private: - Strings keys; + KeysWithInfo keys; std::atomic_size_t index = 0; String bucket; ASTPtr query; Block virtual_header; + + size_t total_size = 0; }; StorageS3Source::KeysIterator::KeysIterator( - const std::vector & keys_, const String & bucket_, ASTPtr query, const Block & virtual_header, ContextPtr context) - : pimpl(std::make_shared(keys_, bucket_, query, virtual_header, context)) + const Aws::S3::S3Client & client_, + const std::string & version_id_, + const std::vector & keys_, + const String & bucket_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context, + ObjectInfos * object_infos, + Strings * read_keys) + : pimpl(std::make_shared( + client_, version_id_, keys_, bucket_, query, + virtual_header, context, object_infos, read_keys)) { } -String StorageS3Source::KeysIterator::next() +StorageS3Source::KeyWithInfo StorageS3Source::KeysIterator::next() { return pimpl->next(); } +size_t StorageS3Source::KeysIterator::getTotalSize() const +{ + return pimpl->getTotalSize(); +} + Block StorageS3Source::getHeader(Block sample_block, const std::vector & requested_virtual_columns) { for (const auto & virtual_column : requested_virtual_columns) @@ -390,9 +528,8 @@ StorageS3Source::StorageS3Source( const std::shared_ptr & client_, const String & bucket_, const String & version_id_, - std::shared_ptr file_iterator_, - const size_t download_thread_num_, - const std::unordered_map & object_infos_) + std::shared_ptr file_iterator_, + const size_t download_thread_num_) : ISource(getHeader(sample_block_, requested_virtual_columns_)) , WithContext(context_) , name(std::move(name_)) @@ -409,9 +546,12 @@ StorageS3Source::StorageS3Source( , requested_virtual_columns(requested_virtual_columns_) , file_iterator(file_iterator_) , download_thread_num(download_thread_num_) - , object_infos(object_infos_) + , create_reader_pool(1) + , create_reader_scheduler(threadPoolCallbackRunner(create_reader_pool, "CreateS3Reader")) { - initialize(); + reader = createReader(); + if (reader) + reader_future = createReaderAsync(); } @@ -423,17 +563,21 @@ void StorageS3Source::onCancel() } -bool StorageS3Source::initialize() +StorageS3Source::ReaderHolder StorageS3Source::createReader() { - String current_key = (*file_iterator)(); + auto [current_key, info] = (*file_iterator)(); if (current_key.empty()) - return false; + return {}; - file_path = fs::path(bucket) / current_key; + size_t object_size = info + ? info->size + : S3::getObjectSize(*client, bucket, current_key, version_id, true, false); int zstd_window_log_max = static_cast(getContext()->getSettingsRef().zstd_window_log_max); - read_buf = wrapReadBufferWithCompressionMethod( - createS3ReadBuffer(current_key), chooseCompressionMethod(current_key, compression_hint), zstd_window_log_max); + auto read_buf = wrapReadBufferWithCompressionMethod( + createS3ReadBuffer(current_key, object_size), + chooseCompressionMethod(current_key, compression_hint), + zstd_window_log_max); auto input_format = getContext()->getInputFormat(format, *read_buf, sample_block, max_block_size, format_settings); QueryPipelineBuilder builder; @@ -446,32 +590,36 @@ bool StorageS3Source::initialize() { return std::make_shared(header, columns_desc, *input_format, getContext()); }); } - pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); - reader = std::make_unique(*pipeline); + auto pipeline = std::make_unique(QueryPipelineBuilder::getPipeline(std::move(builder))); + auto current_reader = std::make_unique(*pipeline); - return true; + return ReaderHolder{fs::path(bucket) / current_key, std::move(read_buf), std::move(pipeline), std::move(current_reader)}; } -std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key) +std::future StorageS3Source::createReaderAsync() { - size_t object_size; - auto it = object_infos.find(fs::path(bucket) / key); - if (it != object_infos.end()) - object_size = it->second.size; - else - object_size = DB::S3::getObjectSize(client, bucket, key, version_id, false, false); + return create_reader_scheduler([this] { return createReader(); }, 0); +} + +std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & key, size_t object_size) +{ + auto read_settings = getContext()->getReadSettings().adjustBufferSize(object_size); + read_settings.enable_filesystem_cache = false; auto download_buffer_size = getContext()->getSettings().max_download_buffer_size; const bool use_parallel_download = download_buffer_size > 0 && download_thread_num > 1; const bool object_too_small = object_size < download_thread_num * download_buffer_size; + if (!use_parallel_download || object_too_small) { LOG_TRACE(log, "Downloading object of size {} from S3 in single thread", object_size); - return std::make_unique(client, bucket, key, version_id, request_settings, getContext()->getReadSettings()); + if (read_settings.remote_fs_method == RemoteFSReadMethod::threadpool) + return createAsyncS3ReadBuffer(key, read_settings, object_size); + + return std::make_unique(client, bucket, key, version_id, request_settings, read_settings); } assert(object_size > 0); - if (download_buffer_size < DBMS_DEFAULT_BUFFER_SIZE) { LOG_WARNING(log, "Downloading buffer {} bytes too small, set at least {} bytes", download_buffer_size, DBMS_DEFAULT_BUFFER_SIZE); @@ -479,13 +627,55 @@ std::unique_ptr StorageS3Source::createS3ReadBuffer(const String & k } auto factory = std::make_unique( - client, bucket, key, version_id, download_buffer_size, object_size, request_settings, getContext()->getReadSettings()); - LOG_TRACE( - log, "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", download_thread_num, object_size, download_buffer_size); + client, bucket, key, version_id, download_buffer_size, object_size, request_settings, read_settings); + + LOG_TRACE(log, + "Downloading from S3 in {} threads. Object size: {}, Range size: {}.", + download_thread_num, object_size, download_buffer_size); return std::make_unique(std::move(factory), threadPoolCallbackRunner(IOThreadPool::get(), "S3ParallelRead"), download_thread_num); } +std::unique_ptr StorageS3Source::createAsyncS3ReadBuffer( + const String & key, const ReadSettings & read_settings, size_t object_size) +{ + auto read_buffer_creator = + [this, read_settings] + (const std::string & path, size_t read_until_position) -> std::shared_ptr + { + return std::make_shared( + client, + bucket, + path, + version_id, + request_settings, + read_settings, + /* use_external_buffer */true, + /* offset */0, + read_until_position, + /* restricted_seek */true); + }; + + auto s3_impl = std::make_unique( + std::move(read_buffer_creator), + StoredObjects{StoredObject{key, object_size}}, + read_settings); + + auto & pool_reader = getContext()->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_REMOTE_FS_READER); + auto async_reader = std::make_unique(pool_reader, read_settings, std::move(s3_impl)); + + async_reader->setReadUntilEnd(); + if (read_settings.remote_fs_prefetch) + async_reader->prefetch(); + + return async_reader; +} + +StorageS3Source::~StorageS3Source() +{ + create_reader_pool.wait(); +} + String StorageS3Source::getName() const { return name; @@ -503,6 +693,14 @@ Chunk StorageS3Source::generate() { UInt64 num_rows = chunk.getNumRows(); + const auto & file_path = reader.getPath(); + size_t total_size = file_iterator->getTotalSize(); + if (num_rows && total_size) + { + updateRowsProgressApprox( + *this, chunk, total_size, total_rows_approx_accumulated, total_rows_count_times, total_rows_approx_max); + } + for (const auto & virtual_column : requested_virtual_columns) { if (virtual_column.name == "_path") @@ -522,52 +720,22 @@ Chunk StorageS3Source::generate() { std::lock_guard lock(reader_mutex); - reader.reset(); - pipeline.reset(); - read_buf.reset(); - if (!initialize()) + assert(reader_future.valid()); + reader = reader_future.get(); + + if (!reader) break; + + /// Even if task is finished the thread may be not freed in pool. + /// So wait until it will be freed before scheduling a new task. + create_reader_pool.wait(); + reader_future = createReaderAsync(); } } return {}; } -static bool checkIfObjectExists(const std::shared_ptr & client, const String & bucket, const String & key) -{ - bool is_finished = false; - Aws::S3::Model::ListObjectsV2Request request; - Aws::S3::Model::ListObjectsV2Outcome outcome; - - request.SetBucket(bucket); - request.SetPrefix(key); - while (!is_finished) - { - ProfileEvents::increment(ProfileEvents::S3ListObjects); - outcome = client->ListObjectsV2(request); - if (!outcome.IsSuccess()) - throw Exception( - ErrorCodes::S3_ERROR, - "Could not list objects in bucket {} with key {}, S3 exception: {}, message: {}", - quoteString(bucket), - quoteString(key), - backQuote(outcome.GetError().GetExceptionName()), - quoteString(outcome.GetError().GetMessage())); - - const auto & result_batch = outcome.GetResult().GetContents(); - for (const auto & obj : result_batch) - { - if (obj.GetKey() == key) - return true; - } - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - is_finished = !outcome.GetResult().GetIsTruncated(); - } - - return false; -} - class StorageS3Sink : public SinkToStorage { public: @@ -777,7 +945,9 @@ StorageS3::StorageS3( distributed_processing_, is_key_with_globs, format_settings, - context_); + context_, + &object_infos); + storage_metadata.setColumns(columns); } else @@ -797,7 +967,7 @@ StorageS3::StorageS3( virtual_block.insert({column.type->createColumn(), column.type, column.name}); } -std::shared_ptr StorageS3::createFileIterator( +std::shared_ptr StorageS3::createFileIterator( const S3Configuration & s3_configuration, const std::vector & keys, bool is_key_with_globs, @@ -805,30 +975,26 @@ std::shared_ptr StorageS3::createFileIterator( ContextPtr local_context, ASTPtr query, const Block & virtual_block, - std::unordered_map * object_infos, + ObjectInfos * object_infos, Strings * read_keys) { if (distributed_processing) { - return std::make_shared( - [callback = local_context->getReadTaskCallback()]() -> String { - return callback(); - }); + return std::make_shared(local_context->getReadTaskCallback()); } else if (is_key_with_globs) { /// Iterate through disclosed globs and make a source for each file - auto glob_iterator = std::make_shared( - *s3_configuration.client, s3_configuration.uri, query, virtual_block, local_context, object_infos, read_keys, s3_configuration.request_settings); - return std::make_shared([glob_iterator]() { return glob_iterator->next(); }); + return std::make_shared( + *s3_configuration.client, s3_configuration.uri, query, virtual_block, + local_context, object_infos, read_keys, s3_configuration.request_settings); } else { - auto keys_iterator - = std::make_shared(keys, s3_configuration.uri.bucket, query, virtual_block, local_context); - if (read_keys) - *read_keys = keys; - return std::make_shared([keys_iterator]() { return keys_iterator->next(); }); + return std::make_shared( + *s3_configuration.client, s3_configuration.uri.version_id, keys, + s3_configuration.uri.bucket, query, virtual_block, local_context, + object_infos, read_keys); } } @@ -869,7 +1035,7 @@ Pipe StorageS3::read( requested_virtual_columns.push_back(virtual_column); } - std::shared_ptr iterator_wrapper = createFileIterator( + std::shared_ptr iterator_wrapper = createFileIterator( s3_configuration, keys, is_key_with_globs, @@ -920,9 +1086,9 @@ Pipe StorageS3::read( s3_configuration.uri.bucket, s3_configuration.uri.version_id, iterator_wrapper, - max_download_threads, - object_infos)); + max_download_threads)); } + auto pipe = Pipe::unitePipes(std::move(pipes)); narrowPipe(pipe, num_streams); @@ -961,7 +1127,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr bool truncate_in_insert = local_context->getSettingsRef().s3_truncate_on_insert; - if (!truncate_in_insert && checkIfObjectExists(s3_configuration.client, s3_configuration.uri.bucket, keys.back())) + if (!truncate_in_insert && S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, keys.back(), s3_configuration.uri.version_id)) { if (local_context->getSettingsRef().s3_create_new_file_on_insert) { @@ -973,7 +1139,7 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr new_key = keys[0].substr(0, pos) + "." + std::to_string(index) + (pos == std::string::npos ? "" : keys[0].substr(pos)); ++index; } - while (checkIfObjectExists(s3_configuration.client, s3_configuration.uri.bucket, new_key)); + while (S3::objectExists(*s3_configuration.client, s3_configuration.uri.bucket, new_key, s3_configuration.uri.version_id)); keys.push_back(new_key); } else @@ -997,7 +1163,6 @@ SinkToStoragePtr StorageS3::write(const ASTPtr & query, const StorageMetadataPtr } } - void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, ContextPtr local_context, TableExclusiveLockHolder &) { updateS3Configuration(local_context, s3_configuration); @@ -1031,10 +1196,8 @@ void StorageS3::truncate(const ASTPtr & /* query */, const StorageMetadataPtr &, void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration & upd) { auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString()); - if (upd.request_settings != settings.request_settings) - upd.request_settings = settings.request_settings; - - upd.request_settings.updateFromSettingsIfEmpty(ctx->getSettings()); + upd.request_settings = settings.request_settings; + upd.request_settings.updateFromSettings(ctx->getSettings()); if (upd.client) { @@ -1075,48 +1238,43 @@ void StorageS3::updateS3Configuration(ContextPtr ctx, StorageS3::S3Configuration upd.auth_settings.use_insecure_imds_request.value_or(ctx->getConfigRef().getBool("s3.use_insecure_imds_request", false))); } - -void StorageS3::processNamedCollectionResult(StorageS3Configuration & configuration, const std::vector> & key_value_args) +void StorageS3::processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection) { - for (const auto & [arg_name, arg_value] : key_value_args) - { - if (arg_name == "access_key_id") - configuration.auth_settings.access_key_id = checkAndGetLiteralArgument(arg_value, "access_key_id"); - else if (arg_name == "secret_access_key") - configuration.auth_settings.secret_access_key = checkAndGetLiteralArgument(arg_value, "secret_access_key"); - else if (arg_name == "filename") - configuration.url = std::filesystem::path(configuration.url) / checkAndGetLiteralArgument(arg_value, "filename"); - else if (arg_name == "use_environment_credentials") - configuration.auth_settings.use_environment_credentials = checkAndGetLiteralArgument(arg_value, "use_environment_credentials"); - else if (arg_name == "max_single_read_retries") - configuration.request_settings.max_single_read_retries = checkAndGetLiteralArgument(arg_value, "max_single_read_retries"); - else if (arg_name == "min_upload_part_size") - configuration.request_settings.min_upload_part_size = checkAndGetLiteralArgument(arg_value, "min_upload_part_size"); - else if (arg_name == "upload_part_size_multiply_factor") - configuration.request_settings.upload_part_size_multiply_factor = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_factor"); - else if (arg_name == "upload_part_size_multiply_parts_count_threshold") - configuration.request_settings.upload_part_size_multiply_parts_count_threshold = checkAndGetLiteralArgument(arg_value, "upload_part_size_multiply_parts_count_threshold"); - else if (arg_name == "max_single_part_upload_size") - configuration.request_settings.max_single_part_upload_size = checkAndGetLiteralArgument(arg_value, "max_single_part_upload_size"); - else if (arg_name == "max_connections") - configuration.request_settings.max_connections = checkAndGetLiteralArgument(arg_value, "max_connections"); - else - throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, - "Unknown key-value argument `{}` for StorageS3, expected: url, [access_key_id, secret_access_key], name of used format and [compression_method].", - arg_name); - } -} + validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys); + std::string filename; + configuration.request_settings = S3Settings::RequestSettings(collection); + + for (const auto & key : collection) + { + if (key == "url") + configuration.url = collection.get(key); + else if (key == "access_key_id") + configuration.auth_settings.access_key_id = collection.get(key); + else if (key == "secret_access_key") + configuration.auth_settings.secret_access_key = collection.get(key); + else if (key == "filename") + filename = collection.get(key); + else if (key == "format") + configuration.format = collection.get(key); + else if (key == "compression" || key == "compression_method") + configuration.compression_method = collection.get(key); + else if (key == "structure") + configuration.structure = collection.get(key); + else if (key == "use_environment_credentials") + configuration.auth_settings.use_environment_credentials = collection.get(key); + } + if (!filename.empty()) + configuration.url = std::filesystem::path(configuration.url) / filename; +} StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPtr local_context) { StorageS3Configuration configuration; - if (auto named_collection = getURLBasedDataSourceConfiguration(engine_args, local_context)) + if (auto named_collection = tryGetNamedCollectionWithOverrides(engine_args)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); - configuration.set(common_configuration); - processNamedCollectionResult(configuration, storage_specific_args); + processNamedCollectionResult(configuration, *named_collection); } else { @@ -1170,7 +1328,7 @@ ColumnsDescription StorageS3::getTableStructureFromData( bool distributed_processing, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos) + ObjectInfos * object_infos) { S3Configuration s3_configuration{ configuration.url, @@ -1193,12 +1351,17 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos) + ObjectInfos * object_infos) { std::vector read_keys; - auto file_iterator - = createFileIterator(s3_configuration, {s3_configuration.uri.key}, is_key_with_globs, distributed_processing, ctx, nullptr, {}, object_infos, &read_keys); + auto file_iterator = createFileIterator( + s3_configuration, + {s3_configuration.uri.key}, + is_key_with_globs, + distributed_processing, + ctx, nullptr, + {}, object_infos, &read_keys); std::optional columns_from_cache; size_t prev_read_keys_size = read_keys.size(); @@ -1207,7 +1370,7 @@ ColumnsDescription StorageS3::getTableStructureFromDataImpl( ReadBufferIterator read_buffer_iterator = [&, first = true](ColumnsDescription & cached_columns) mutable -> std::unique_ptr { - auto key = (*file_iterator)(); + auto [key, _] = (*file_iterator)(); if (key.empty()) { @@ -1348,7 +1511,7 @@ std::optional StorageS3::tryGetColumnsFromCache( const Strings::const_iterator & begin, const Strings::const_iterator & end, const S3Configuration & s3_configuration, - std::unordered_map * object_infos, + ObjectInfos * object_infos, const String & format_name, const std::optional & format_settings, const ContextPtr & ctx) @@ -1369,13 +1532,14 @@ std::optional StorageS3::tryGetColumnsFromCache( /// Note that in case of exception in getObjectInfo returned info will be empty, /// but schema cache will handle this case and won't return columns from cache /// because we can't say that it's valid without last modification time. - info = S3::getObjectInfo(s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, false, false); + info = S3::getObjectInfo(*s3_configuration.client, s3_configuration.uri.bucket, *it, s3_configuration.uri.version_id, false, false); if (object_infos) (*object_infos)[path] = info; } if (info.last_modification_time) return info.last_modification_time; + return std::nullopt; }; diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 2add41d4f95..671610173bd 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -12,11 +12,13 @@ #include #include +#include #include #include #include #include #include +#include #include #include @@ -30,10 +32,37 @@ namespace DB class PullingPipelineExecutor; class StorageS3SequentialSource; +class NamedCollection; + class StorageS3Source : public ISource, WithContext { public: - class DisclosedGlobIterator + + struct KeyWithInfo + { + KeyWithInfo() = default; + KeyWithInfo(String key_, std::optional info_) + : key(std::move(key_)), info(std::move(info_)) + { + } + + String key; + std::optional info; + }; + + using KeysWithInfo = std::vector; + using ObjectInfos = std::unordered_map; + class IIterator + { + public: + virtual ~IIterator() = default; + virtual KeyWithInfo next() = 0; + virtual size_t getTotalSize() const = 0; + + KeyWithInfo operator ()() { return next(); } + }; + + class DisclosedGlobIterator : public IIterator { public: DisclosedGlobIterator( @@ -42,11 +71,12 @@ public: ASTPtr query, const Block & virtual_header, ContextPtr context, - std::unordered_map * object_infos = nullptr, + ObjectInfos * object_infos = nullptr, Strings * read_keys_ = nullptr, const S3Settings::RequestSettings & request_settings_ = {}); - String next(); + KeyWithInfo next() override; + size_t getTotalSize() const override; private: class Impl; @@ -54,12 +84,22 @@ public: std::shared_ptr pimpl; }; - class KeysIterator + class KeysIterator : public IIterator { public: explicit KeysIterator( - const std::vector & keys_, const String & bucket_, ASTPtr query, const Block & virtual_header, ContextPtr context); - String next(); + const Aws::S3::S3Client & client_, + const std::string & version_id_, + const std::vector & keys_, + const String & bucket_, + ASTPtr query, + const Block & virtual_header, + ContextPtr context, + ObjectInfos * object_infos = nullptr, + Strings * read_keys = nullptr); + + KeyWithInfo next() override; + size_t getTotalSize() const override; private: class Impl; @@ -67,7 +107,18 @@ public: std::shared_ptr pimpl; }; - using IteratorWrapper = std::function; + class ReadTaskIterator : public IIterator + { + public: + explicit ReadTaskIterator(const ReadTaskCallback & callback_) : callback(callback_) {} + + KeyWithInfo next() override { return {callback(), {}}; } + + size_t getTotalSize() const override { return 0; } + + private: + ReadTaskCallback callback; + }; static Block getHeader(Block sample_block, const std::vector & requested_virtual_columns); @@ -85,9 +136,10 @@ public: const std::shared_ptr & client_, const String & bucket, const String & version_id, - std::shared_ptr file_iterator_, - size_t download_thread_num, - const std::unordered_map & object_infos_); + std::shared_ptr file_iterator_, + size_t download_thread_num); + + ~StorageS3Source() override; String getName() const override; @@ -99,7 +151,6 @@ private: String name; String bucket; String version_id; - String file_path; String format; ColumnsDescription columns_desc; UInt64 max_block_size; @@ -109,24 +160,59 @@ private: Block sample_block; std::optional format_settings; + struct ReaderHolder + { + public: + ReaderHolder( + String path_, + std::unique_ptr read_buf_, + std::unique_ptr pipeline_, + std::unique_ptr reader_) + : path(std::move(path_)) + , read_buf(std::move(read_buf_)) + , pipeline(std::move(pipeline_)) + , reader(std::move(reader_)) + { + } + + ReaderHolder() = default; + + explicit operator bool() const { return reader != nullptr; } + PullingPipelineExecutor * operator->() { return reader.get(); } + const PullingPipelineExecutor * operator->() const { return reader.get(); } + const String & getPath() const { return path; } + + private: + String path; + std::unique_ptr read_buf; + std::unique_ptr pipeline; + std::unique_ptr reader; + }; + + ReaderHolder reader; - std::unique_ptr read_buf; - std::unique_ptr pipeline; - std::unique_ptr reader; /// onCancel and generate can be called concurrently std::mutex reader_mutex; std::vector requested_virtual_columns; - std::shared_ptr file_iterator; + std::shared_ptr file_iterator; size_t download_thread_num = 1; Poco::Logger * log = &Poco::Logger::get("StorageS3Source"); - std::unordered_map object_infos; + ThreadPool create_reader_pool; + ThreadPoolCallbackRunner create_reader_scheduler; + std::future reader_future; + + UInt64 total_rows_approx_max = 0; + size_t total_rows_count_times = 0; + UInt64 total_rows_approx_accumulated = 0; /// Recreate ReadBuffer and Pipeline for each file. - bool initialize(); + ReaderHolder createReader(); + std::future createReaderAsync(); - std::unique_ptr createS3ReadBuffer(const String & key); + std::unique_ptr createS3ReadBuffer(const String & key, size_t object_size); + std::unique_ptr createAsyncS3ReadBuffer(const String & key, const ReadSettings & read_settings, size_t object_size); }; /** @@ -172,14 +258,16 @@ public: static StorageS3Configuration getConfiguration(ASTs & engine_args, ContextPtr local_context); + using ObjectInfos = StorageS3Source::ObjectInfos; + static ColumnsDescription getTableStructureFromData( const StorageS3Configuration & configuration, bool distributed_processing, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos = nullptr); + ObjectInfos * object_infos = nullptr); - static void processNamedCollectionResult(StorageS3Configuration & configuration, const std::vector> & key_value_args); + static void processNamedCollectionResult(StorageS3Configuration & configuration, const NamedCollection & collection); struct S3Configuration { @@ -229,11 +317,11 @@ private: ASTPtr partition_by; bool is_key_with_globs = false; - std::unordered_map object_infos; + ObjectInfos object_infos; static void updateS3Configuration(ContextPtr, S3Configuration &); - static std::shared_ptr createFileIterator( + static std::shared_ptr createFileIterator( const S3Configuration & s3_configuration, const std::vector & keys, bool is_key_with_globs, @@ -241,7 +329,7 @@ private: ContextPtr local_context, ASTPtr query, const Block & virtual_block, - std::unordered_map * object_infos = nullptr, + ObjectInfos * object_infos = nullptr, Strings * read_keys = nullptr); static ColumnsDescription getTableStructureFromDataImpl( @@ -252,7 +340,7 @@ private: bool is_key_with_globs, const std::optional & format_settings, ContextPtr ctx, - std::unordered_map * object_infos = nullptr); + ObjectInfos * object_infos = nullptr); bool supportsSubcolumns() const override; @@ -262,7 +350,7 @@ private: const Strings::const_iterator & begin, const Strings::const_iterator & end, const S3Configuration & s3_configuration, - std::unordered_map * object_infos, + ObjectInfos * object_infos, const String & format_name, const std::optional & format_settings, const ContextPtr & ctx); diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp index b10f3c65ebf..86eb5a2702c 100644 --- a/src/Storages/StorageS3Cluster.cpp +++ b/src/Storages/StorageS3Cluster.cpp @@ -102,7 +102,8 @@ Pipe StorageS3Cluster::read( auto iterator = std::make_shared( *s3_configuration.client, s3_configuration.uri, query_info.query, virtual_block, context); - auto callback = std::make_shared([iterator]() mutable -> String { return iterator->next(); }); + + auto callback = std::make_shared>([iterator]() mutable -> String { return iterator->next().key; }); /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) auto interpreter = InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()); diff --git a/src/Storages/StorageS3Settings.cpp b/src/Storages/StorageS3Settings.cpp index 962f7c113bd..8c1974527b6 100644 --- a/src/Storages/StorageS3Settings.cpp +++ b/src/Storages/StorageS3Settings.cpp @@ -3,23 +3,220 @@ #include #include +#include #include #include #include -#include #include namespace DB { -namespace +namespace ErrorCodes { - /// An object up to 5 GB can be copied in a single atomic operation. - constexpr UInt64 DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE = 5_GiB; + extern const int INVALID_SETTING_VALUE; +} - /// The maximum size of an uploaded part. - constexpr UInt64 DEFAULT_MAX_UPLOAD_PART_SIZE = 5_GiB; +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const Settings & settings) +{ + updateFromSettingsImpl(settings, false); + validate(); +} + +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix) + : PartUploadSettings(settings) +{ + String key = config_prefix + "." + setting_name_prefix; + min_upload_part_size = config.getUInt64(key + "min_upload_part_size", min_upload_part_size); + max_upload_part_size = config.getUInt64(key + "max_upload_part_size", max_upload_part_size); + upload_part_size_multiply_factor = config.getUInt64(key + "upload_part_size_multiply_factor", upload_part_size_multiply_factor); + upload_part_size_multiply_parts_count_threshold = config.getUInt64(key + "upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold); + max_part_number = config.getUInt64(key + "max_part_number", max_part_number); + max_single_part_upload_size = config.getUInt64(key + "max_single_part_upload_size", max_single_part_upload_size); + max_single_operation_copy_size = config.getUInt64(key + "max_single_operation_copy_size", max_single_operation_copy_size); + + validate(); +} + +S3Settings::RequestSettings::PartUploadSettings::PartUploadSettings(const NamedCollection & collection) +{ + min_upload_part_size = collection.getOrDefault("min_upload_part_size", min_upload_part_size); + upload_part_size_multiply_factor = collection.getOrDefault("upload_part_size_multiply_factor", upload_part_size_multiply_factor); + upload_part_size_multiply_parts_count_threshold = collection.getOrDefault("upload_part_size_multiply_parts_count_threshold", upload_part_size_multiply_parts_count_threshold); + max_single_part_upload_size = collection.getOrDefault("max_single_part_upload_size", max_single_part_upload_size); + + validate(); +} + +void S3Settings::RequestSettings::PartUploadSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) +{ + if (!if_changed || settings.s3_min_upload_part_size.changed) + min_upload_part_size = settings.s3_min_upload_part_size; + + if (!if_changed || settings.s3_max_upload_part_size.changed) + max_upload_part_size = settings.s3_max_upload_part_size; + + if (!if_changed || settings.s3_upload_part_size_multiply_factor.changed) + upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; + + if (!if_changed || settings.s3_upload_part_size_multiply_parts_count_threshold.changed) + upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; + + if (!if_changed || settings.s3_max_single_part_upload_size.changed) + max_single_part_upload_size = settings.s3_max_single_part_upload_size; +} + +void S3Settings::RequestSettings::PartUploadSettings::validate() +{ + static constexpr size_t min_upload_part_size_limit = 5 * 1024 * 1024; + if (min_upload_part_size < min_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting min_upload_part_size has invalid value {} which is less than the s3 API limit {}", + ReadableSize(min_upload_part_size), ReadableSize(min_upload_part_size_limit)); + + static constexpr size_t max_upload_part_size_limit = 5ull * 1024 * 1024 * 1024; + if (max_upload_part_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_upload_part_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_upload_part_size), ReadableSize(max_upload_part_size_limit)); + + if (max_single_part_upload_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_single_part_upload_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_single_part_upload_size), ReadableSize(max_upload_part_size_limit)); + + if (max_single_operation_copy_size > max_upload_part_size_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_single_operation_copy_size has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_single_operation_copy_size), ReadableSize(max_upload_part_size_limit)); + + if (max_upload_part_size < min_upload_part_size) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_upload_part_size ({}) can't be less than setting min_upload_part_size {}", + ReadableSize(max_upload_part_size), ReadableSize(min_upload_part_size)); + + if (!upload_part_size_multiply_factor) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor cannot be zero", + upload_part_size_multiply_factor); + + if (!upload_part_size_multiply_parts_count_threshold) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_parts_count_threshold cannot be zero", + upload_part_size_multiply_parts_count_threshold); + + if (!max_part_number) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_part_number cannot be zero", + max_part_number); + + static constexpr size_t max_part_number_limit = 10000; + if (max_part_number > max_part_number_limit) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting max_part_number has invalid value {} which is grater than the s3 API limit {}", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + + size_t maybe_overflow; + if (common::mulOverflow(max_upload_part_size, upload_part_size_multiply_factor, maybe_overflow)) + throw Exception( + ErrorCodes::INVALID_SETTING_VALUE, + "Setting upload_part_size_multiply_factor is too big ({}). Multiplication to max_upload_part_size ({}) will cause integer overflow", + ReadableSize(max_part_number), ReadableSize(max_part_number_limit)); + + /// TODO: it's possible to set too small limits. We can check that max possible object size is not too small. +} + + +S3Settings::RequestSettings::RequestSettings(const Settings & settings) + : upload_settings(settings) +{ + updateFromSettingsImpl(settings, false); +} + +S3Settings::RequestSettings::RequestSettings(const NamedCollection & collection) + : upload_settings(collection) +{ + max_single_read_retries = collection.getOrDefault("max_single_read_retries", max_single_read_retries); + max_connections = collection.getOrDefault("max_connections", max_connections); +} + +S3Settings::RequestSettings::RequestSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix) + : upload_settings(config, config_prefix, settings, setting_name_prefix) +{ + String key = config_prefix + "." + setting_name_prefix; + max_single_read_retries = config.getUInt64(key + "max_single_read_retries", settings.s3_max_single_read_retries); + max_connections = config.getUInt64(key + "max_connections", settings.s3_max_connections); + check_objects_after_upload = config.getBool(key + "check_objects_after_upload", settings.s3_check_objects_after_upload); + + /// NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, + /// which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. + if (UInt64 max_get_rps = config.getUInt64(key + "max_get_rps", settings.s3_max_get_rps)) + { + size_t default_max_get_burst = settings.s3_max_get_burst + ? settings.s3_max_get_burst + : (Throttler::default_burst_seconds * max_get_rps); + + size_t max_get_burst = config.getUInt64(key + "max_get_burst", default_max_get_burst); + + get_request_throttler = std::make_shared(max_get_rps, max_get_burst); + } + if (UInt64 max_put_rps = config.getUInt64(key + "max_put_rps", settings.s3_max_put_rps)) + { + size_t default_max_put_burst = settings.s3_max_put_burst + ? settings.s3_max_put_burst + : (Throttler::default_burst_seconds * max_put_rps); + + size_t max_put_burst = config.getUInt64(key + "max_put_burst", default_max_put_burst); + + put_request_throttler = std::make_shared(max_put_rps, max_put_burst); + } +} + +void S3Settings::RequestSettings::updateFromSettingsImpl(const Settings & settings, bool if_changed) +{ + if (!if_changed || settings.s3_max_single_read_retries.changed) + max_single_read_retries = settings.s3_max_single_read_retries; + + if (!if_changed || settings.s3_max_connections.changed) + max_connections = settings.s3_max_connections; + + if (!if_changed || settings.s3_check_objects_after_upload.changed) + check_objects_after_upload = settings.s3_check_objects_after_upload; + + if (!if_changed || settings.s3_max_unexpected_write_error_retries.changed) + max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; + + if ((!if_changed || settings.s3_max_get_rps.changed || settings.s3_max_get_burst.changed) && settings.s3_max_get_rps) + get_request_throttler = std::make_shared( + settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); + + if ((!if_changed || settings.s3_max_put_rps.changed || settings.s3_max_put_burst.changed) && settings.s3_max_put_rps) + put_request_throttler = std::make_shared( + settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); +} + +void S3Settings::RequestSettings::updateFromSettings(const Settings & settings) +{ + updateFromSettingsImpl(settings, true); + upload_settings.updateFromSettings(settings); } @@ -33,49 +230,13 @@ void StorageS3Settings::loadFromConfig(const String & config_elem, const Poco::U Poco::Util::AbstractConfiguration::Keys config_keys; config.keys(config_elem, config_keys); - auto get_string_for_key = [&](const String & key, const String & elem, bool with_default = true, const String & default_value = "") - { - return with_default ? config.getString(config_elem + "." + key + "." + elem, default_value) : config.getString(config_elem + "." + key + "." + elem); - }; - - auto get_uint_for_key = [&](const String & key, const String & elem, bool with_default = true, UInt64 default_value = 0) - { - return with_default ? config.getUInt64(config_elem + "." + key + "." + elem, default_value) : config.getUInt64(config_elem + "." + key + "." + elem); - }; - - - auto get_bool_for_key = [&](const String & key, const String & elem, bool with_default = true, bool default_value = false) - { - return with_default ? config.getBool(config_elem + "." + key + "." + elem, default_value) : config.getBool(config_elem + "." + key + "." + elem); - }; - - for (const String & key : config_keys) { if (config.has(config_elem + "." + key + ".endpoint")) { - auto endpoint = get_string_for_key(key, "endpoint", false); - + auto endpoint = config.getString(config_elem + "." + key + ".endpoint"); auto auth_settings = S3::AuthSettings::loadFromConfig(config_elem + "." + key, config); - - S3Settings::RequestSettings request_settings; - request_settings.max_single_read_retries = get_uint_for_key(key, "max_single_read_retries", true, settings.s3_max_single_read_retries); - request_settings.min_upload_part_size = get_uint_for_key(key, "min_upload_part_size", true, settings.s3_min_upload_part_size); - request_settings.max_upload_part_size = get_uint_for_key(key, "max_upload_part_size", true, DEFAULT_MAX_UPLOAD_PART_SIZE); - request_settings.upload_part_size_multiply_factor = get_uint_for_key(key, "upload_part_size_multiply_factor", true, settings.s3_upload_part_size_multiply_factor); - request_settings.upload_part_size_multiply_parts_count_threshold = get_uint_for_key(key, "upload_part_size_multiply_parts_count_threshold", true, settings.s3_upload_part_size_multiply_parts_count_threshold); - request_settings.max_single_part_upload_size = get_uint_for_key(key, "max_single_part_upload_size", true, settings.s3_max_single_part_upload_size); - request_settings.max_single_operation_copy_size = get_uint_for_key(key, "max_single_operation_copy_size", true, DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE); - request_settings.max_connections = get_uint_for_key(key, "max_connections", true, settings.s3_max_connections); - request_settings.check_objects_after_upload = get_bool_for_key(key, "check_objects_after_upload", true, false); - - // NOTE: it would be better to reuse old throttlers to avoid losing token bucket state on every config reload, which could lead to exceeding limit for short time. But it is good enough unless very high `burst` values are used. - if (UInt64 max_get_rps = get_uint_for_key(key, "max_get_rps", true, settings.s3_max_get_rps)) - request_settings.get_request_throttler = std::make_shared( - max_get_rps, get_uint_for_key(key, "max_get_burst", true, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * max_get_rps)); - if (UInt64 max_put_rps = get_uint_for_key(key, "max_put_rps", true, settings.s3_max_put_rps)) - request_settings.put_request_throttler = std::make_shared( - max_put_rps, get_uint_for_key(key, "max_put_burst", true, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * max_put_rps)); + S3Settings::RequestSettings request_settings(config, config_elem + "." + key, settings); s3_settings.emplace(endpoint, S3Settings{std::move(auth_settings), std::move(request_settings)}); } @@ -98,51 +259,4 @@ S3Settings StorageS3Settings::getSettings(const String & endpoint) const return {}; } -S3Settings::RequestSettings::RequestSettings(const Settings & settings) -{ - max_single_read_retries = settings.s3_max_single_read_retries; - min_upload_part_size = settings.s3_min_upload_part_size; - upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; - upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; - max_single_part_upload_size = settings.s3_max_single_part_upload_size; - max_connections = settings.s3_max_connections; - check_objects_after_upload = settings.s3_check_objects_after_upload; - max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; - if (settings.s3_max_get_rps) - get_request_throttler = std::make_shared( - settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); - if (settings.s3_max_put_rps) - put_request_throttler = std::make_shared( - settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); -} - -void S3Settings::RequestSettings::updateFromSettingsIfEmpty(const Settings & settings) -{ - if (!max_single_read_retries) - max_single_read_retries = settings.s3_max_single_read_retries; - if (!min_upload_part_size) - min_upload_part_size = settings.s3_min_upload_part_size; - if (!max_upload_part_size) - max_upload_part_size = DEFAULT_MAX_UPLOAD_PART_SIZE; - if (!upload_part_size_multiply_factor) - upload_part_size_multiply_factor = settings.s3_upload_part_size_multiply_factor; - if (!upload_part_size_multiply_parts_count_threshold) - upload_part_size_multiply_parts_count_threshold = settings.s3_upload_part_size_multiply_parts_count_threshold; - if (!max_single_part_upload_size) - max_single_part_upload_size = settings.s3_max_single_part_upload_size; - if (!max_single_operation_copy_size) - max_single_operation_copy_size = DEFAULT_MAX_SINGLE_OPERATION_COPY_SIZE; - if (!max_connections) - max_connections = settings.s3_max_connections; - if (!max_unexpected_write_error_retries) - max_unexpected_write_error_retries = settings.s3_max_unexpected_write_error_retries; - check_objects_after_upload = settings.s3_check_objects_after_upload; - if (!get_request_throttler && settings.s3_max_get_rps) - get_request_throttler = std::make_shared( - settings.s3_max_get_rps, settings.s3_max_get_burst ? settings.s3_max_get_burst : Throttler::default_burst_seconds * settings.s3_max_get_rps); - if (!put_request_throttler && settings.s3_max_put_rps) - put_request_throttler = std::make_shared( - settings.s3_max_put_rps, settings.s3_max_put_burst ? settings.s3_max_put_burst : Throttler::default_burst_seconds * settings.s3_max_put_rps); -} - } diff --git a/src/Storages/StorageS3Settings.h b/src/Storages/StorageS3Settings.h index 955cd2d025b..368fcfaf469 100644 --- a/src/Storages/StorageS3Settings.h +++ b/src/Storages/StorageS3Settings.h @@ -21,53 +21,78 @@ namespace DB { struct Settings; +class NamedCollection; struct S3Settings { struct RequestSettings { - size_t max_single_read_retries = 0; - size_t min_upload_part_size = 0; - size_t max_upload_part_size = 0; - size_t upload_part_size_multiply_factor = 0; - size_t upload_part_size_multiply_parts_count_threshold = 0; - size_t max_single_part_upload_size = 0; - size_t max_single_operation_copy_size = 0; - size_t max_connections = 0; + struct PartUploadSettings + { + size_t min_upload_part_size = 16 * 1024 * 1024; + size_t max_upload_part_size = 5ULL * 1024 * 1024 * 1024; + size_t upload_part_size_multiply_factor = 2; + size_t upload_part_size_multiply_parts_count_threshold = 500; + size_t max_part_number = 10000; + size_t max_single_part_upload_size = 32 * 1024 * 1024; + size_t max_single_operation_copy_size = 5ULL * 1024 * 1024 * 1024; + + void updateFromSettings(const Settings & settings) { updateFromSettingsImpl(settings, true); } + void validate(); + + private: + PartUploadSettings() = default; + explicit PartUploadSettings(const Settings & settings); + explicit PartUploadSettings(const NamedCollection & collection); + PartUploadSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix = {}); + + void updateFromSettingsImpl(const Settings & settings, bool if_changed); + + friend struct RequestSettings; + }; + + private: + PartUploadSettings upload_settings = {}; + + public: + size_t max_single_read_retries = 4; + size_t max_connections = 1024; bool check_objects_after_upload = false; - size_t max_unexpected_write_error_retries = 0; + size_t max_unexpected_write_error_retries = 4; ThrottlerPtr get_request_throttler; ThrottlerPtr put_request_throttler; + const PartUploadSettings & getUploadSettings() const { return upload_settings; } + RequestSettings() = default; explicit RequestSettings(const Settings & settings); + explicit RequestSettings(const NamedCollection & collection); - inline bool operator==(const RequestSettings & other) const - { - return max_single_read_retries == other.max_single_read_retries - && min_upload_part_size == other.min_upload_part_size - && max_upload_part_size == other.max_upload_part_size - && upload_part_size_multiply_factor == other.upload_part_size_multiply_factor - && upload_part_size_multiply_parts_count_threshold == other.upload_part_size_multiply_parts_count_threshold - && max_single_part_upload_size == other.max_single_part_upload_size - && max_single_operation_copy_size == other.max_single_operation_copy_size - && max_connections == other.max_connections - && check_objects_after_upload == other.check_objects_after_upload - && max_unexpected_write_error_retries == other.max_unexpected_write_error_retries - && get_request_throttler == other.get_request_throttler - && put_request_throttler == other.put_request_throttler; - } + /// What's the setting_name_prefix, and why do we need it? + /// There are (at least) two config sections where s3 settings can be specified: + /// * settings for s3 disk (clickhouse/storage_configuration/disks) + /// * settings for s3 storage (clickhouse/s3), which are also used for backups + /// Even though settings are the same, in case of s3 disk they are prefixed with "s3_" + /// ("s3_max_single_part_upload_size"), but in case of s3 storage they are not + /// ( "max_single_part_upload_size"). Why this happened is a complete mystery to me. + RequestSettings( + const Poco::Util::AbstractConfiguration & config, + const String & config_prefix, + const Settings & settings, + String setting_name_prefix = {}); - void updateFromSettingsIfEmpty(const Settings & settings); + void updateFromSettings(const Settings & settings); + + private: + void updateFromSettingsImpl(const Settings & settings, bool if_changed); }; S3::AuthSettings auth_settings; RequestSettings request_settings; - - inline bool operator==(const S3Settings & other) const - { - return auth_settings == other.auth_settings && request_settings == other.request_settings; - } }; /// Settings for the StorageS3. diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 92d53ffc1ac..62823f6dabd 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -626,6 +626,7 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String & auto in = backup_entry->getReadBuffer(); auto out = disk->writeFile(data_file_path, max_compress_block_size, WriteMode::Append); copyData(*in, *out); + out->finalize(); } /// Append the index. diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index a55d7ad3c09..4f3003e68b0 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -123,9 +124,19 @@ void StorageView::read( } auto options = SelectQueryOptions(QueryProcessingStage::Complete, 0, false, query_info.settings_limit_offset_done); - InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); - interpreter.addStorageLimits(*query_info.storage_limits); - interpreter.buildQueryPlan(query_plan); + + if (context->getSettingsRef().allow_experimental_analyzer) + { + InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, context); + interpreter.addStorageLimits(*query_info.storage_limits); + query_plan = std::move(interpreter).extractQueryPlan(); + } + else + { + InterpreterSelectWithUnionQuery interpreter(current_inner_query, context, options, column_names); + interpreter.addStorageLimits(*query_info.storage_limits); + interpreter.buildQueryPlan(query_plan); + } /// It's expected that the columns read from storage are not constant. /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery. diff --git a/src/Storages/System/StorageSystemContributors.generated.cpp b/src/Storages/System/StorageSystemContributors.generated.cpp index f5b6829c7ef..f69f9f8ee7f 100644 --- a/src/Storages/System/StorageSystemContributors.generated.cpp +++ b/src/Storages/System/StorageSystemContributors.generated.cpp @@ -94,6 +94,7 @@ const char * auto_contributors[] { "Aliaksandr Shylau", "Alina Terekhova", "Amesaru", + "Amila Welihinda", "Amir Vaza", "Amos Bird", "Amr Alaa", @@ -174,6 +175,7 @@ const char * auto_contributors[] { "Avogar", "Azat Khuzhin", "BSD_Conqueror", + "BSWaterB", "Babacar Diassé", "Bakhtiyor Ruziev", "BanyRule", @@ -186,6 +188,7 @@ const char * auto_contributors[] { "Bharat Nallan", "Bharat Nallan Chakravarthy", "Big Elephant", + "BigRedEye", "Bill", "BiteTheDDDDt", "BlahGeek", @@ -203,6 +206,7 @@ const char * auto_contributors[] { "Brett Hoerner", "Brian Hunter", "Bulat Gaifullin", + "Camden Cheek", "Camilo Sierra", "Carbyn", "Carlos Rodríguez Hernández", @@ -291,6 +295,7 @@ const char * auto_contributors[] { "Eldar Zaitov", "Elena", "Elena Baskakova", + "Elena Torró", "Elghazal Ahmed", "Elizaveta Mironyuk", "Elykov Alexandr", @@ -525,6 +530,7 @@ const char * auto_contributors[] { "Maksim Kita", "Mallik Hassan", "Malte", + "Manuel de la Peña", "Marat IDRISOV", "Marcelo Rodriguez", "Marek Vavrusa", @@ -534,6 +540,7 @@ const char * auto_contributors[] { "Mark Andreev", "Mark Frost", "Mark Papadakis", + "Mark Polokhov", "Maroun Maroun", "Marquitos", "Marsel Arduanov", @@ -709,6 +716,7 @@ const char * auto_contributors[] { "Quanfa Fu", "Quid37", "Radistka-75", + "Raevsky Rudolf", "Rafael Acevedo", "Rafael David Tinoco", "Rajkumar", @@ -779,6 +787,7 @@ const char * auto_contributors[] { "Sergey Mirvoda", "Sergey Ryzhkov", "Sergey Shtykov", + "Sergey Skvortsov", "Sergey Tulentsev", "Sergey V. Galtsev", "Sergey Zaikin", @@ -790,6 +799,7 @@ const char * auto_contributors[] { "Sherry Wang", "Shoh Jahon", "SiderZhang", + "Sidorov Pavel", "Silviu Caragea", "Simeon Emanuilov", "Simon Liu", @@ -878,6 +888,7 @@ const char * auto_contributors[] { "Viktor Taranenko", "Vincent Bernat", "Vitalii S", + "Vitaliy", "Vitaliy Fedorchenko", "Vitaliy Karnienko", "Vitaliy Kozlovskiy", @@ -922,6 +933,7 @@ const char * auto_contributors[] { "Weiqing Xu", "William Shallum", "Winter Zhang", + "Xbitz29", "XenoAmess", "Xianda Ke", "Xiang Zhou", @@ -1013,6 +1025,7 @@ const char * auto_contributors[] { "benbiti", "bgranvea", "bharatnc", + "bit-ranger", "bkuschel", "blazerer", "bluebirddm", @@ -1238,6 +1251,7 @@ const char * auto_contributors[] { "luc1ph3r", "lulichao", "luocongkai", + "lzydmxy", "m-ves", "madianjun", "maiha", @@ -1313,6 +1327,7 @@ const char * auto_contributors[] { "peter279k", "philip.han", "pingyu", + "pkubaj", "potya", "presto53", "proller", @@ -1378,6 +1393,7 @@ const char * auto_contributors[] { "taiyang-li", "tangjiangling", "tao jiang", + "taofengliu", "taojiatao", "tavplubix", "tchepavel", @@ -1394,6 +1410,7 @@ const char * auto_contributors[] { "turbo jason", "tyrionhuang", "ubuntu", + "unbyte", "unegare", "unknown", "urgordeadbeef", @@ -1481,6 +1498,7 @@ const char * auto_contributors[] { "Дмитрий Канатников", "Иванов Евгений", "Илья Исаев", + "Коренберг ☢️ Марк", "Павел Литвиненко", "Смитюх Вячеслав", "Сундуков Алексей", diff --git a/src/Storages/System/StorageSystemDatabases.cpp b/src/Storages/System/StorageSystemDatabases.cpp index fb7a1bc59b8..432d2c4ac64 100644 --- a/src/Storages/System/StorageSystemDatabases.cpp +++ b/src/Storages/System/StorageSystemDatabases.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include namespace DB @@ -17,6 +19,7 @@ NamesAndTypesList StorageSystemDatabases::getNamesAndTypes() {"data_path", std::make_shared()}, {"metadata_path", std::make_shared()}, {"uuid", std::make_shared()}, + {"engine_full", std::make_shared()}, {"comment", std::make_shared()} }; } @@ -28,6 +31,43 @@ NamesAndAliases StorageSystemDatabases::getNamesAndAliases() }; } +static String getEngineFull(const DatabasePtr & database) +{ + DDLGuardPtr guard; + while (true) + { + String name = database->getDatabaseName(); + guard = DatabaseCatalog::instance().getDDLGuard(name, ""); + + /// Ensure that the database was not renamed before we acquired the lock + auto locked_database = DatabaseCatalog::instance().tryGetDatabase(name); + + if (locked_database.get() == database.get()) + break; + + /// Database was dropped + if (name == database->getDatabaseName()) + return {}; + + guard.reset(); + LOG_TRACE(&Poco::Logger::get("StorageSystemDatabases"), "Failed to lock database {} ({}), will retry", name, database->getUUID()); + } + + ASTPtr ast = database->getCreateDatabaseQuery(); + auto * ast_create = ast->as(); + + if (!ast_create || !ast_create->storage) + return {}; + + String engine_full = ast_create->storage->formatWithSecretsHidden(); + static const char * const extra_head = " ENGINE = "; + + if (startsWith(engine_full, extra_head)) + engine_full = engine_full.substr(strlen(extra_head)); + + return engine_full; +} + void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr context, const SelectQueryInfo &) const { const auto access = context->getAccess(); @@ -47,7 +87,8 @@ void StorageSystemDatabases::fillData(MutableColumns & res_columns, ContextPtr c res_columns[2]->insert(context->getPath() + database->getDataPath()); res_columns[3]->insert(database->getMetadataPath()); res_columns[4]->insert(database->getUUID()); - res_columns[5]->insert(database->getDatabaseComment()); + res_columns[5]->insert(getEngineFull(database)); + res_columns[6]->insert(database->getDatabaseComment()); } } diff --git a/src/Storages/System/StorageSystemNamedCollections.cpp b/src/Storages/System/StorageSystemNamedCollections.cpp index 6f4078369d2..16c259796e6 100644 --- a/src/Storages/System/StorageSystemNamedCollections.cpp +++ b/src/Storages/System/StorageSystemNamedCollections.cpp @@ -8,7 +8,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Storages/System/StorageSystemParts.cpp b/src/Storages/System/StorageSystemParts.cpp index 0be44219c7d..b205b7c224d 100644 --- a/src/Storages/System/StorageSystemParts.cpp +++ b/src/Storages/System/StorageSystemParts.cpp @@ -1,4 +1,7 @@ #include "StorageSystemParts.h" +#include +#include +#include #include #include @@ -15,6 +18,29 @@ #include #include +namespace +{ +std::string_view getRemovalStateDescription(DB::DataPartRemovalState state) +{ + switch (state) + { + case DB::DataPartRemovalState::NOT_ATTEMPTED: + return "Cleanup thread hasn't seen this part yet"; + case DB::DataPartRemovalState::VISIBLE_TO_TRANSACTIONS: + return "Part maybe visible for transactions"; + case DB::DataPartRemovalState::NON_UNIQUE_OWNERSHIP: + return "Part ownership is not unique"; + case DB::DataPartRemovalState::NOT_REACHED_REMOVAL_TIME: + return "Part hasn't reached removal time yet"; + case DB::DataPartRemovalState::HAS_SKIPPED_MUTATION_PARENT: + return "Waiting mutation parent to be removed"; + case DB::DataPartRemovalState::REMOVED: + return "Part was selected to be removed"; + } +} + +} + namespace DB { @@ -92,6 +118,9 @@ StorageSystemParts::StorageSystemParts(const StorageID & table_id_) {"removal_csn", std::make_shared()}, {"has_lightweight_delete", std::make_shared()}, + + {"last_removal_attemp_time", std::make_shared()}, + {"removal_state", std::make_shared()}, } ) { @@ -310,6 +339,10 @@ void StorageSystemParts::processNextStorage( columns[res_index++]->insert(part->version.removal_csn.load(std::memory_order_relaxed)); if (columns_mask[src_index++]) columns[res_index++]->insert(part->hasLightweightDelete()); + if (columns_mask[src_index++]) + columns[res_index++]->insert(static_cast(part->last_removal_attemp_time.load(std::memory_order_relaxed))); + if (columns_mask[src_index++]) + columns[res_index++]->insert(getRemovalStateDescription(part->removal_state.load(std::memory_order_relaxed))); /// _state column should be the latest. /// Do not use part->getState*, it can be changed from different thread diff --git a/src/Storages/VirtualColumnUtils.cpp b/src/Storages/VirtualColumnUtils.cpp index 43e31b8e4f4..2ada0fa3323 100644 --- a/src/Storages/VirtualColumnUtils.cpp +++ b/src/Storages/VirtualColumnUtils.cpp @@ -144,6 +144,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block else const_columns[i] = ColumnConst::create(columns[i]->cloneResized(1), 1); } + block.setColumns(const_columns); bool unmodified = true; @@ -163,6 +164,7 @@ bool prepareFilterBlockWithQuery(const ASTPtr & query, ContextPtr context, Block ActionsVisitor::Data visitor_data( context, SizeLimits{}, 1, source_columns, std::move(actions), prepared_sets, true, true, true, false, { aggregation_keys, grouping_set_keys, GroupByKind::NONE }); + ActionsVisitor(visitor_data).visit(node); actions = visitor_data.getActions(); auto expression_actions = std::make_shared(actions); diff --git a/src/Storages/tests/gtest_named_collections.cpp b/src/Storages/tests/gtest_named_collections.cpp index 369e8ec44f6..d5fe5010991 100644 --- a/src/Storages/tests/gtest_named_collections.cpp +++ b/src/Storages/tests/gtest_named_collections.cpp @@ -1,6 +1,6 @@ #include -#include -#include +#include +#include #include #include #include diff --git a/src/TableFunctions/ITableFunction.h b/src/TableFunctions/ITableFunction.h index 21c26062566..9d490105b17 100644 --- a/src/TableFunctions/ITableFunction.h +++ b/src/TableFunctions/ITableFunction.h @@ -64,6 +64,12 @@ public: /// This hint could be used not to repeat schema in function arguments. virtual void setStructureHint(const ColumnsDescription &) {} + /// Used for table functions that can use structure hint during INSERT INTO ... SELECT ... FROM table_function(...) + /// It returns possible virtual column names of corresponding storage. If select query contains + /// one of these columns, the structure from insertion table won't be used as a structure hint, + /// because we cannot determine which column from table correspond to this virtual column. + virtual std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const { return {}; } + virtual bool supportsReadingSubsetOfColumns() { return true; } /// Create storage according to the query. diff --git a/src/TableFunctions/TableFunctionFile.h b/src/TableFunctions/TableFunctionFile.h index 20ecdb6222c..797948cad03 100644 --- a/src/TableFunctions/TableFunctionFile.h +++ b/src/TableFunctions/TableFunctionFile.h @@ -22,6 +22,11 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context) const override; + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return {"_path", "_file"}; + } + protected: int fd = -1; void parseFirstArguments(const ASTPtr & arg, const ContextPtr & context) override; diff --git a/src/TableFunctions/TableFunctionHDFS.h b/src/TableFunctions/TableFunctionHDFS.h index a391673e04d..c4c111de6e5 100644 --- a/src/TableFunctions/TableFunctionHDFS.h +++ b/src/TableFunctions/TableFunctionHDFS.h @@ -26,6 +26,11 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context) const override; + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return {"_path", "_file"}; + } + private: StoragePtr getStorage( const String & source, const String & format_, const ColumnsDescription & columns, ContextPtr global_context, diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 3b68a0766aa..23822486c29 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "registerTableFunctions.h" #include @@ -30,11 +31,9 @@ namespace ErrorCodes /// This is needed to avoid copy-pase. Because s3Cluster arguments only differ in additional argument (first) - cluster name void TableFunctionS3::parseArgumentsImpl(const String & error_message, ASTs & args, ContextPtr context, StorageS3Configuration & s3_configuration) { - if (auto named_collection = getURLBasedDataSourceConfiguration(args, context)) + if (auto named_collection = tryGetNamedCollectionWithOverrides(args)) { - auto [common_configuration, storage_specific_args] = named_collection.value(); - s3_configuration.set(common_configuration); - StorageS3::processNamedCollectionResult(s3_configuration, storage_specific_args); + StorageS3::processNamedCollectionResult(s3_configuration, *named_collection); } else { diff --git a/src/TableFunctions/TableFunctionS3.h b/src/TableFunctions/TableFunctionS3.h index b2eb03e8839..125238fa7db 100644 --- a/src/TableFunctions/TableFunctionS3.h +++ b/src/TableFunctions/TableFunctionS3.h @@ -32,6 +32,11 @@ public: bool supportsReadingSubsetOfColumns() override; + std::unordered_set getVirtualsToCheckBeforeUsingStructureHint() const override + { + return {"_path", "_file"}; + } + protected: friend class TableFunctionS3Cluster; diff --git a/src/configure_config.cmake b/src/configure_config.cmake index d7cdb769525..58cb34b7d67 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -22,6 +22,9 @@ endif() if (TARGET ch_rust::blake3) set(USE_BLAKE3 1) endif() +if (TARGET ch_rust::skim) + set(USE_SKIM 1) +endif() if (TARGET OpenSSL::SSL) set(USE_SSL 1) endif() diff --git a/tests/ci/build_check.py b/tests/ci/build_check.py index c9e8dac2c00..a718bd53418 100644 --- a/tests/ci/build_check.py +++ b/tests/ci/build_check.py @@ -122,7 +122,8 @@ def check_for_success_run( build_name: str, build_config: BuildConfig, ) -> None: - logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix) + # the final empty argument is necessary for distinguish build and build_suffix + logged_prefix = os.path.join(S3_BUILDS_BUCKET, s3_prefix, "") logging.info("Checking for artifacts in %s", logged_prefix) try: # TODO: theoretically, it would miss performance artifact for pr==0, diff --git a/tests/ci/cherry_pick.py b/tests/ci/cherry_pick.py index b3e90feef2a..f7c69445eed 100644 --- a/tests/ci/cherry_pick.py +++ b/tests/ci/cherry_pick.py @@ -488,7 +488,7 @@ def main(): logging.getLogger("git_helper").setLevel(logging.DEBUG) token = args.token or get_best_robot_token() - gh = GitHub(token, per_page=100) + gh = GitHub(token, create_cache_dir=False, per_page=100) bp = Backport(gh, args.repo, args.dry_run) # https://github.com/python/mypy/issues/3004 bp.gh.cache_path = f"{TEMP_PATH}/gh_cache" # type: ignore diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index bfc7e45812b..753da25f300 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -171,12 +171,12 @@ CI_CONFIG = { "tidy": "disable", "with_coverage": False, }, - "binary_amd64sse2": { - "compiler": "clang-15-amd64sse2", + "binary_amd64_compat": { + "compiler": "clang-15-amd64-compat", "build_type": "", "sanitizer": "", "package_type": "binary", - "static_binary_name": "amd64sse2", + "static_binary_name": "amd64compat", "libraries": "static", "tidy": "disable", "with_coverage": False, @@ -203,7 +203,7 @@ CI_CONFIG = { "binary_freebsd", "binary_darwin_aarch64", "binary_ppc64le", - "binary_amd64sse2", + "binary_amd64_compat", ], }, "tests_config": { diff --git a/tests/ci/github_helper.py b/tests/ci/github_helper.py index bd740827b34..1b8861b92a6 100644 --- a/tests/ci/github_helper.py +++ b/tests/ci/github_helper.py @@ -30,9 +30,11 @@ Issues = List[Issue] class GitHub(github.Github): - def __init__(self, *args, **kwargs): - # Define meta attribute + def __init__(self, *args, create_cache_dir=True, **kwargs): + # Define meta attribute and apply setter logic self._cache_path = Path(CACHE_PATH) + if create_cache_dir: + self.cache_path = self.cache_path # And set Path super().__init__(*args, **kwargs) self._retries = 0 diff --git a/tests/ci/mark_release_ready.py b/tests/ci/mark_release_ready.py index 57ddb166693..2a10208d2e7 100644 --- a/tests/ci/mark_release_ready.py +++ b/tests/ci/mark_release_ready.py @@ -10,7 +10,7 @@ from release import RELEASE_READY_STATUS def main(): pr_info = PRInfo() - gh = GitHub(get_best_robot_token(), per_page=100) + gh = GitHub(get_best_robot_token(), create_cache_dir=False, per_page=100) commit = get_commit(gh, pr_info.sha) commit.create_status( context=RELEASE_READY_STATUS, diff --git a/tests/ci/release.py b/tests/ci/release.py index 502efd79173..57d5c4cdd6e 100755 --- a/tests/ci/release.py +++ b/tests/ci/release.py @@ -32,8 +32,6 @@ from version_helper import ( RELEASE_READY_STATUS = "Ready for release" -git = Git() - class Repo: VALID = ("ssh", "https", "origin") @@ -79,7 +77,7 @@ class Release: self.release_commit = release_commit assert release_type in self.BIG + self.SMALL self.release_type = release_type - self._git = git + self._git = Git() self._version = get_version_from_repo(git=self._git) self._release_branch = "" self._rollback_stack = [] # type: List[str] diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index c02128d114f..b7f74c5aeb7 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -145,7 +145,7 @@ if __name__ == "__main__": ) logging.info("Going to run func tests: %s", run_command) - with TeePopen(run_command, run_log_path) as process: + with TeePopen(run_command, run_log_path, timeout=60 * 150) as process: retcode = process.wait() if retcode == 0: logging.info("Run successfully") diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 70bf1cd4d17..78c98813a72 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -148,7 +148,7 @@ if __name__ == "__main__": if args.push: checkout_head(pr_info) - gh = GitHub(get_best_robot_token()) + gh = GitHub(get_best_robot_token(), per_page=100, create_cache_dir=False) atexit.register(update_mergeable_check, gh, pr_info, NAME) diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index 61404847bff..b74069c16ab 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +from io import TextIOWrapper from subprocess import Popen, PIPE, STDOUT from threading import Thread from time import sleep @@ -14,15 +15,23 @@ import sys # it finishes. stderr and stdout will be redirected both to specified file and # stdout. class TeePopen: - # pylint: disable=W0102 - def __init__(self, command, log_file, env=os.environ.copy(), timeout=None): + def __init__( + self, + command: str, + log_file: str, + env: Optional[dict] = None, + timeout: Optional[int] = None, + ): self.command = command - self.log_file = log_file - self.env = env + self._log_file_name = log_file + self._log_file = None # type: Optional[TextIOWrapper] + self.env = env or os.environ.copy() self._process = None # type: Optional[Popen] self.timeout = timeout - def _check_timeout(self): + def _check_timeout(self) -> None: + if self.timeout is None: + return sleep(self.timeout) while self.process.poll() is None: logging.warning( @@ -33,7 +42,7 @@ class TeePopen: os.killpg(self.process.pid, 9) sleep(10) - def __enter__(self): + def __enter__(self) -> "TeePopen": self.process = Popen( self.command, shell=True, @@ -44,25 +53,21 @@ class TeePopen: stdout=PIPE, bufsize=1, ) - self.log_file = open(self.log_file, "w", encoding="utf-8") if self.timeout is not None and self.timeout > 0: t = Thread(target=self._check_timeout) t.daemon = True # does not block the program from exit t.start() return self - def __exit__(self, t, value, traceback): - for line in self.process.stdout: # type: ignore - sys.stdout.write(line) - self.log_file.write(line) - - self.process.wait() + def __exit__(self, exc_type, exc_value, traceback): + self.wait() self.log_file.close() def wait(self): - for line in self.process.stdout: # type: ignore - sys.stdout.write(line) - self.log_file.write(line) + if self.process.stdout is not None: + for line in self.process.stdout: + sys.stdout.write(line) + self.log_file.write(line) return self.process.wait() @@ -75,3 +80,9 @@ class TeePopen: @process.setter def process(self, process: Popen) -> None: self._process = process + + @property + def log_file(self) -> TextIOWrapper: + if self._log_file is None: + self._log_file = open(self._log_file_name, "w", encoding="utf-8") + return self._log_file diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 7eeabde1380..90e8acc702d 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -1180,6 +1180,9 @@ def test_tables_dependency(): t4 = random_table_names[3] t5 = random_table_names[4] t6 = random_table_names[5] + t7 = random_table_names[6] + t8 = random_table_names[7] + t9 = random_table_names[8] # Create a materialized view and a dictionary with a local table as source. instance.query( @@ -1193,7 +1196,7 @@ def test_tables_dependency(): instance.query(f"CREATE MATERIALIZED VIEW {t3} TO {t2} AS SELECT x, y FROM {t1}") instance.query( - f"CREATE DICTIONARY {t4} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(0)" + f"CREATE DICTIONARY {t4} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(4)" ) instance.query(f"CREATE TABLE {t5} AS dictionary({t4})") @@ -1202,12 +1205,25 @@ def test_tables_dependency(): f"CREATE TABLE {t6}(x Int64, y String DEFAULT dictGet({t4}, 'y', x)) ENGINE=MergeTree ORDER BY tuple()" ) + instance.query(f"CREATE VIEW {t7} AS SELECT sum(x) FROM (SELECT x FROM {t6})") + + instance.query( + f"CREATE TABLE {t8} AS {t2} ENGINE = Buffer({t2.split('.')[0]}, {t2.split('.')[1]}, 16, 10, 100, 10000, 1000000, 10000000, 100000000)" + ) + + instance.query( + f"CREATE DICTIONARY {t9} (x Int64, y String) PRIMARY KEY x SOURCE(CLICKHOUSE(TABLE '{t1.split('.')[1]}' DB '{t1.split('.')[0]}')) LAYOUT(FLAT()) LIFETIME(9)" + ) + # Make backup. backup_name = new_backup_name() instance.query(f"BACKUP DATABASE test, DATABASE test2 TO {backup_name}") # Drop everything in reversive order. def drop(): + instance.query(f"DROP DICTIONARY {t9}") + instance.query(f"DROP TABLE {t8} NO DELAY") + instance.query(f"DROP TABLE {t7} NO DELAY") instance.query(f"DROP TABLE {t6} NO DELAY") instance.query(f"DROP TABLE {t5} NO DELAY") instance.query(f"DROP DICTIONARY {t4}") @@ -1219,11 +1235,36 @@ def test_tables_dependency(): drop() - # Restore everything and check. + # Restore everything. instance.query(f"RESTORE ALL FROM {backup_name}") + # Check everything is restored. assert instance.query( "SELECT concat(database, '.', name) AS c FROM system.tables WHERE database IN ['test', 'test2'] ORDER BY c" - ) == TSV(sorted([t1, t2, t3, t4, t5, t6])) + ) == TSV(sorted([t1, t2, t3, t4, t5, t6, t7, t8, t9])) + + # Check logs. + instance.query("SYSTEM FLUSH LOGS") + expect_in_logs = [ + f"Table {t1} has no dependencies (level 0)", + f"Table {t2} has no dependencies (level 0)", + ( + f"Table {t3} has 2 dependencies: {t1}, {t2} (level 1)", + f"Table {t3} has 2 dependencies: {t2}, {t1} (level 1)", + ), + f"Table {t4} has 1 dependencies: {t1} (level 1)", + f"Table {t5} has 1 dependencies: {t4} (level 2)", + f"Table {t6} has 1 dependencies: {t4} (level 2)", + f"Table {t7} has 1 dependencies: {t6} (level 3)", + f"Table {t8} has 1 dependencies: {t2} (level 1)", + f"Table {t9} has 1 dependencies: {t1} (level 1)", + ] + for expect in expect_in_logs: + assert any( + [ + instance.contains_in_log(f"RestorerFromBackup: {x}") + for x in tuple(expect) + ] + ) drop() diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 09915c8e789..27448b95b51 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -796,6 +796,84 @@ def test_mutation(): node1.query(f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name}") +def test_tables_dependency(): + node1.query("CREATE DATABASE mydb ON CLUSTER 'cluster3'") + + node1.query( + "CREATE TABLE mydb.src ON CLUSTER 'cluster' (x Int64, y String) ENGINE=MergeTree ORDER BY tuple()" + ) + + node1.query( + "CREATE DICTIONARY mydb.dict ON CLUSTER 'cluster' (x Int64, y String) PRIMARY KEY x " + "SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() DB 'mydb' TABLE 'src')) LAYOUT(FLAT()) LIFETIME(0)" + ) + + node1.query( + "CREATE TABLE mydb.dist1 (x Int64) ENGINE=Distributed('cluster', 'mydb', 'src')" + ) + + node3.query( + "CREATE TABLE mydb.dist2 (x Int64) ENGINE=Distributed(cluster, 'mydb', 'src')" + ) + + node1.query("CREATE TABLE mydb.clusterfunc1 AS cluster('cluster', 'mydb.src')") + node1.query("CREATE TABLE mydb.clusterfunc2 AS cluster(cluster, mydb.src)") + node1.query("CREATE TABLE mydb.clusterfunc3 AS cluster(cluster, 'mydb', 'src')") + node1.query( + "CREATE TABLE mydb.clusterfunc4 AS cluster(cluster, dictionary(mydb.dict))" + ) + node1.query( + "CREATE TABLE mydb.clusterfunc5 AS clusterAllReplicas(cluster, dictionary(mydb.dict))" + ) + + node3.query("CREATE TABLE mydb.clusterfunc6 AS cluster('cluster', 'mydb.src')") + node3.query("CREATE TABLE mydb.clusterfunc7 AS cluster(cluster, mydb.src)") + node3.query("CREATE TABLE mydb.clusterfunc8 AS cluster(cluster, 'mydb', 'src')") + node3.query( + "CREATE TABLE mydb.clusterfunc9 AS cluster(cluster, dictionary(mydb.dict))" + ) + node3.query( + "CREATE TABLE mydb.clusterfunc10 AS clusterAllReplicas(cluster, dictionary(mydb.dict))" + ) + + backup_name = new_backup_name() + node3.query(f"BACKUP DATABASE mydb ON CLUSTER 'cluster3' TO {backup_name}") + + node3.query("DROP DATABASE mydb") + + node3.query(f"RESTORE DATABASE mydb ON CLUSTER 'cluster3' FROM {backup_name}") + + node3.query("SYSTEM FLUSH LOGS ON CLUSTER 'cluster3'") + expect_in_logs_1 = [ + "Table mydb.src has no dependencies (level 0)", + "Table mydb.dict has 1 dependencies: mydb.src (level 1)", + "Table mydb.dist1 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc1 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc2 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc3 has 1 dependencies: mydb.src (level 1)", + "Table mydb.clusterfunc4 has 1 dependencies: mydb.dict (level 2)", + "Table mydb.clusterfunc5 has 1 dependencies: mydb.dict (level 2)", + ] + expect_in_logs_2 = [ + "Table mydb.src has no dependencies (level 0)", + "Table mydb.dict has 1 dependencies: mydb.src (level 1)", + ] + expect_in_logs_3 = [ + "Table mydb.dist2 has no dependencies (level 0)", + "Table mydb.clusterfunc6 has no dependencies (level 0)", + "Table mydb.clusterfunc7 has no dependencies (level 0)", + "Table mydb.clusterfunc8 has no dependencies (level 0)", + "Table mydb.clusterfunc9 has no dependencies (level 0)", + "Table mydb.clusterfunc10 has no dependencies (level 0)", + ] + for expect in expect_in_logs_1: + assert node1.contains_in_log(f"RestorerFromBackup: {expect}") + for expect in expect_in_logs_2: + assert node2.contains_in_log(f"RestorerFromBackup: {expect}") + for expect in expect_in_logs_3: + assert node3.contains_in_log(f"RestorerFromBackup: {expect}") + + def test_get_error_from_other_host(): node1.query("CREATE TABLE tbl (`x` UInt8) ENGINE = MergeTree ORDER BY x") node1.query("INSERT INTO tbl VALUES (3)") diff --git a/tests/integration/test_parts_removal/__init__.py b/tests/integration/test_create_query_constraints/__init__.py similarity index 100% rename from tests/integration/test_parts_removal/__init__.py rename to tests/integration/test_create_query_constraints/__init__.py diff --git a/tests/integration/test_create_query_constraints/test.py b/tests/integration/test_create_query_constraints/test.py new file mode 100644 index 00000000000..997671a6996 --- /dev/null +++ b/tests/integration/test_create_query_constraints/test.py @@ -0,0 +1,92 @@ +import pytest +import asyncio +import re +import random +import os.path +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import assert_eq_with_retry, TSV + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance("instance") + + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_create_query_const_constraints(): + + instance.query("CREATE USER u_const SETTINGS max_threads = 1 CONST") + instance.query("GRANT ALL ON *.* TO u_const") + + expected_error = "Setting max_threads should not be changed" + + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads = 1", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads MIN 0 MAX 2", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads WRITABLE", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE ROLE inner_role SETTINGS max_threads = 1", user="u_const" + ) + assert expected_error in instance.query_and_get_error( + "CREATE SETTINGS PROFILE inner_profile SETTINGS max_threads = 1", user="u_const" + ) + + instance.query( + "CREATE USER inner_user_1 SETTINGS max_threads CONST", user="u_const" + ) + instance.query( + "CREATE USER inner_user_2 SETTINGS max_threads = 1 CONST", user="u_const" + ) + instance.query("DROP USER u_const, inner_user_1, inner_user_2") + + +def test_create_query_minmax_constraints(): + + instance.query("CREATE USER u_minmax SETTINGS max_threads = 4 MIN 2 MAX 6") + instance.query("GRANT ALL ON *.* TO u_minmax") + + expected_error = "Setting max_threads shouldn't be less than" + + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads = 1", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads MIN 1 MAX 3", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE ROLE inner_role SETTINGS max_threads MIN 1 MAX 3", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE SETTINGS PROFILE inner_profile SETTINGS max_threads MIN 1 MAX 3", + user="u_minmax", + ) + + expected_error = "Setting max_threads shouldn't be greater than" + + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads = 8", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE USER inner_user SETTINGS max_threads MIN 4 MAX 8", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE ROLE inner_role SETTINGS max_threads MIN 4 MAX 8", user="u_minmax" + ) + assert expected_error in instance.query_and_get_error( + "CREATE SETTINGS PROFILE inner_profile SETTINGS max_threads MIN 4 MAX 8", + user="u_minmax", + ) + + instance.query("CREATE USER inner_user SETTINGS max_threads = 3", user="u_minmax") + instance.query("DROP USER u_minmax, inner_user") diff --git a/tests/integration/test_keeper_zookeeper_converter/test.py b/tests/integration/test_keeper_zookeeper_converter/test.py index aa2e435ce36..3c48e9cec22 100644 --- a/tests/integration/test_keeper_zookeeper_converter/test.py +++ b/tests/integration/test_keeper_zookeeper_converter/test.py @@ -5,6 +5,7 @@ import helpers.keeper_utils as keeper_utils from kazoo.client import KazooClient from kazoo.retry import KazooRetry from kazoo.security import make_acl +from kazoo.handlers.threading import KazooTimeoutError import os import time @@ -35,6 +36,11 @@ def restart_and_clear_zookeeper(): start_zookeeper() +def restart_zookeeper(): + stop_zookeeper() + start_zookeeper() + + def clear_clickhouse_data(): node.exec_in_container( [ @@ -93,13 +99,25 @@ def get_fake_zk(timeout=60.0): def get_genuine_zk(timeout=60.0): - _genuine_zk_instance = KazooClient( - hosts=cluster.get_instance_ip("node") + ":2181", - timeout=timeout, - connection_retry=KazooRetry(max_tries=20), - ) - _genuine_zk_instance.start() - return _genuine_zk_instance + CONNECTION_RETRIES = 100 + for i in range(CONNECTION_RETRIES): + try: + _genuine_zk_instance = KazooClient( + hosts=cluster.get_instance_ip("node") + ":2181", + timeout=timeout, + connection_retry=KazooRetry(max_tries=20), + ) + _genuine_zk_instance.start() + return _genuine_zk_instance + except KazooTimeoutError: + if i == CONNECTION_RETRIES - 1: + raise + + print( + "Failed to connect to ZK cluster because of timeout. Restarting cluster and trying again." + ) + time.sleep(0.2) + restart_zookeeper() def compare_stats(stat1, stat2, path, ignore_pzxid=False): diff --git a/tests/integration/test_merge_tree_hdfs/test.py b/tests/integration/test_merge_tree_hdfs/test.py index 9edb71ec15a..8a6703be2dc 100644 --- a/tests/integration/test_merge_tree_hdfs/test.py +++ b/tests/integration/test_merge_tree_hdfs/test.py @@ -211,8 +211,8 @@ def test_attach_detach_partition(cluster): node.query("ALTER TABLE hdfs_test DETACH PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)" - wait_for_delete_inactive_parts(node, "hdfs_test") wait_for_delete_empty_parts(node, "hdfs_test") + wait_for_delete_inactive_parts(node, "hdfs_test") hdfs_objects = fs.listdir("/clickhouse") assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 @@ -225,8 +225,8 @@ def test_attach_detach_partition(cluster): node.query("ALTER TABLE hdfs_test DROP PARTITION '2020-01-03'") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(4096)" - wait_for_delete_inactive_parts(node, "hdfs_test") wait_for_delete_empty_parts(node, "hdfs_test") + wait_for_delete_inactive_parts(node, "hdfs_test") hdfs_objects = fs.listdir("/clickhouse") assert len(hdfs_objects) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE @@ -237,8 +237,8 @@ def test_attach_detach_partition(cluster): settings={"allow_drop_detached": 1}, ) assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)" - wait_for_delete_inactive_parts(node, "hdfs_test") wait_for_delete_empty_parts(node, "hdfs_test") + wait_for_delete_inactive_parts(node, "hdfs_test") hdfs_objects = fs.listdir("/clickhouse") assert len(hdfs_objects) == FILES_OVERHEAD @@ -305,8 +305,8 @@ def test_table_manipulations(cluster): node.query("TRUNCATE TABLE hdfs_test") assert node.query("SELECT count(*) FROM hdfs_test FORMAT Values") == "(0)" - wait_for_delete_inactive_parts(node, "hdfs_test") wait_for_delete_empty_parts(node, "hdfs_test") + wait_for_delete_inactive_parts(node, "hdfs_test") hdfs_objects = fs.listdir("/clickhouse") assert len(hdfs_objects) == FILES_OVERHEAD diff --git a/tests/integration/test_merge_tree_s3/test.py b/tests/integration/test_merge_tree_s3/test.py index 002bc8ec9d7..417fa436471 100644 --- a/tests/integration/test_merge_tree_s3/test.py +++ b/tests/integration/test_merge_tree_s3/test.py @@ -323,8 +323,8 @@ def test_attach_detach_partition(cluster, node_name): ) node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-03'") - wait_for_delete_inactive_parts(node, "s3_test") wait_for_delete_empty_parts(node, "s3_test") + wait_for_delete_inactive_parts(node, "s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) @@ -339,8 +339,8 @@ def test_attach_detach_partition(cluster, node_name): ) node.query("ALTER TABLE s3_test DROP PARTITION '2020-01-03'") - wait_for_delete_inactive_parts(node, "s3_test") wait_for_delete_empty_parts(node, "s3_test") + wait_for_delete_inactive_parts(node, "s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(4096)" assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) @@ -348,8 +348,8 @@ def test_attach_detach_partition(cluster, node_name): ) node.query("ALTER TABLE s3_test DETACH PARTITION '2020-01-04'") - wait_for_delete_inactive_parts(node, "s3_test") wait_for_delete_empty_parts(node, "s3_test") + wait_for_delete_inactive_parts(node, "s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/"))) @@ -431,8 +431,8 @@ def test_table_manipulations(cluster, node_name): ) node.query("TRUNCATE TABLE s3_test") - wait_for_delete_inactive_parts(node, "s3_test") wait_for_delete_empty_parts(node, "s3_test") + wait_for_delete_inactive_parts(node, "s3_test") assert node.query("SELECT count(*) FROM s3_test FORMAT Values") == "(0)" assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) @@ -546,8 +546,8 @@ def test_freeze_unfreeze(cluster, node_name): node.query("ALTER TABLE s3_test FREEZE WITH NAME 'backup2'") node.query("TRUNCATE TABLE s3_test") - wait_for_delete_inactive_parts(node, "s3_test") wait_for_delete_empty_parts(node, "s3_test") + wait_for_delete_inactive_parts(node, "s3_test") assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) == FILES_OVERHEAD + FILES_OVERHEAD_PER_PART_WIDE * 2 @@ -586,8 +586,8 @@ def test_freeze_system_unfreeze(cluster, node_name): node.query("ALTER TABLE s3_test_removed FREEZE WITH NAME 'backup3'") node.query("TRUNCATE TABLE s3_test") - wait_for_delete_inactive_parts(node, "s3_test") wait_for_delete_empty_parts(node, "s3_test") + wait_for_delete_inactive_parts(node, "s3_test") node.query("DROP TABLE s3_test_removed NO DELAY") assert ( len(list(minio.list_objects(cluster.minio_bucket, "data/", recursive=True))) diff --git a/tests/integration/test_partition/test.py b/tests/integration/test_partition/test.py index 6bd224851e7..8a457eddd9d 100644 --- a/tests/integration/test_partition/test.py +++ b/tests/integration/test_partition/test.py @@ -201,8 +201,8 @@ def attach_check_all_parts_table(started_cluster): def test_attach_check_all_parts(attach_check_all_parts_table): q("ALTER TABLE test.attach_partition DETACH PARTITION 0") - wait_for_delete_inactive_parts(instance, "test.attach_partition") wait_for_delete_empty_parts(instance, "test.attach_partition") + wait_for_delete_inactive_parts(instance, "test.attach_partition") path_to_detached = path_to_data + "data/test/attach_partition/detached/" instance.exec_in_container(["mkdir", "{}".format(path_to_detached + "0_5_5_0")]) @@ -470,11 +470,20 @@ def test_detached_part_dir_exists(started_cluster): ) # will move all_1_1_0 to detached/all_1_1_0 and create all_1_1_1 wait_for_delete_empty_parts(instance, "detached_part_dir_exists") + wait_for_delete_inactive_parts(instance, "detached_part_dir_exists") q("detach table detached_part_dir_exists") q("attach table detached_part_dir_exists") + q("insert into detached_part_dir_exists select 1") # will create all_1_1_0 q("insert into detached_part_dir_exists select 1") # will create all_2_2_0 - q("insert into detached_part_dir_exists select 1") # will create all_3_3_0 + + assert ( + q( + "select name from system.parts where table='detached_part_dir_exists' and active order by name" + ) + == "all_1_1_0\nall_2_2_0\n" + ) + instance.exec_in_container( [ "bash", @@ -511,7 +520,7 @@ def test_make_clone_in_detached(started_cluster): path = path_to_data + "data/default/clone_in_detached/" # broken part already detached - q("insert into clone_in_detached values (42, '¯\_(ツ)_/¯')") + q("insert into clone_in_detached values (42, '¯-_(ツ)_-¯')") instance.exec_in_container(["rm", path + "all_0_0_0/data.bin"]) instance.exec_in_container( ["cp", "-r", path + "all_0_0_0", path + "detached/broken_all_0_0_0"] @@ -522,7 +531,7 @@ def test_make_clone_in_detached(started_cluster): ) # there's a directory with the same name, but different content - q("insert into clone_in_detached values (43, '¯\_(ツ)_/¯')") + q("insert into clone_in_detached values (43, '¯-_(ツ)_-¯')") instance.exec_in_container(["rm", path + "all_1_1_0/data.bin"]) instance.exec_in_container( ["cp", "-r", path + "all_1_1_0", path + "detached/broken_all_1_1_0"] @@ -551,7 +560,7 @@ def test_make_clone_in_detached(started_cluster): ) # there are directories with the same name, but different content, and part already detached - q("insert into clone_in_detached values (44, '¯\_(ツ)_/¯')") + q("insert into clone_in_detached values (44, '¯-_(ツ)_-¯')") instance.exec_in_container(["rm", path + "all_2_2_0/data.bin"]) instance.exec_in_container( ["cp", "-r", path + "all_2_2_0", path + "detached/broken_all_2_2_0"] diff --git a/tests/integration/test_parts_removal/test.py b/tests/integration/test_parts_removal/test.py deleted file mode 100644 index 4772178d63b..00000000000 --- a/tests/integration/test_parts_removal/test.py +++ /dev/null @@ -1,71 +0,0 @@ -# pylint: disable=unused-argument -# pylint: disable=redefined-outer-name -# pylint: disable=line-too-long - -import pytest - -from helpers.cluster import ClickHouseCluster - -cluster = ClickHouseCluster(__file__) - -node = cluster.add_instance("node", stay_alive=True) - - -def query_split(node, query): - return list( - map(lambda x: x.strip().split("\t"), node.query(query).strip().split("\n")) - ) - - -@pytest.fixture(scope="module") -def start_cluster(): - try: - cluster.start() - yield cluster - finally: - cluster.shutdown() - - -def test_parts_removal_on_abnormal_exit(start_cluster): - node.query( - """ - create table test_parts_removal (key Int) engine=MergeTree order by key; - insert into test_parts_removal values (1); -- all_1_1_0 - insert into test_parts_removal values (2); -- all_1_2_0 - optimize table test_parts_removal; -- all_2_2_0 - """ - ) - - parts = query_split( - node, "select name, _state from system.parts where table = 'test_parts_removal'" - ) - assert parts == [ - ["all_1_1_0", "Outdated"], - ["all_1_2_1", "Active"], - ["all_2_2_0", "Outdated"], - ] - - node.restart_clickhouse(kill=True) - - parts = query_split( - node, "select name, _state from system.parts where table = 'test_parts_removal'" - ) - assert parts == [ - ["all_1_1_0", "Outdated"], - ["all_1_2_1", "Active"], - ["all_2_2_0", "Outdated"], - ] - - node.query( - """ - detach table test_parts_removal; - attach table test_parts_removal; - """ - ) - - parts = query_split( - node, "select name, _state from system.parts where table = 'test_parts_removal'" - ) - assert parts == [ - ["all_1_2_1", "Active"], - ] diff --git a/tests/integration/test_password_constraints/__init__.py b/tests/integration/test_password_constraints/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_password_constraints/configs/complexity_rules.xml b/tests/integration/test_password_constraints/configs/complexity_rules.xml new file mode 100644 index 00000000000..fd387297abf --- /dev/null +++ b/tests/integration/test_password_constraints/configs/complexity_rules.xml @@ -0,0 +1,25 @@ + + + + .{12} + be at least 12 characters long + + + \p{N} + contain at least 1 numeric character + + + \p{Ll} + contain at least 1 lowercase character + + + \p{Lu} + contain at least 1 uppercase character + + + [^\p{L}\p{N}] + contain at least 1 special character + + + + diff --git a/tests/integration/test_password_constraints/test.py b/tests/integration/test_password_constraints/test.py new file mode 100644 index 00000000000..e3628861b28 --- /dev/null +++ b/tests/integration/test_password_constraints/test.py @@ -0,0 +1,42 @@ +import pytest + +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance("node", main_configs=["configs/complexity_rules.xml"]) + + +@pytest.fixture(scope="module") +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_complexity_rules(start_cluster): + + error_message = "DB::Exception: Invalid password. The password should: be at least 12 characters long, contain at least 1 numeric character, contain at least 1 lowercase character, contain at least 1 uppercase character, contain at least 1 special character" + assert error_message in node.query_and_get_error( + "CREATE USER u_1 IDENTIFIED WITH plaintext_password BY ''" + ) + + error_message = "DB::Exception: Invalid password. The password should: contain at least 1 lowercase character, contain at least 1 uppercase character, contain at least 1 special character" + assert error_message in node.query_and_get_error( + "CREATE USER u_2 IDENTIFIED WITH sha256_password BY '000000000000'" + ) + + error_message = "DB::Exception: Invalid password. The password should: contain at least 1 uppercase character, contain at least 1 special character" + assert error_message in node.query_and_get_error( + "CREATE USER u_3 IDENTIFIED WITH double_sha1_password BY 'a00000000000'" + ) + + error_message = "DB::Exception: Invalid password. The password should: contain at least 1 special character" + assert error_message in node.query_and_get_error( + "CREATE USER u_4 IDENTIFIED WITH plaintext_password BY 'aA0000000000'" + ) + + node.query("CREATE USER u_5 IDENTIFIED WITH plaintext_password BY 'aA!000000000'") + node.query("DROP USER u_5") diff --git a/tests/integration/test_profile_settings_and_constraints_order/__init__.py b/tests/integration/test_profile_settings_and_constraints_order/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_profile_settings_and_constraints_order/configs/constraints_first.xml b/tests/integration/test_profile_settings_and_constraints_order/configs/constraints_first.xml new file mode 100644 index 00000000000..bfc6d504a08 --- /dev/null +++ b/tests/integration/test_profile_settings_and_constraints_order/configs/constraints_first.xml @@ -0,0 +1,22 @@ + + + + + + 1 + + + 1 + + + + + + + ::/0 + + default + default + + + diff --git a/tests/integration/test_profile_settings_and_constraints_order/configs/constraints_last.xml b/tests/integration/test_profile_settings_and_constraints_order/configs/constraints_last.xml new file mode 100644 index 00000000000..3242ecf963e --- /dev/null +++ b/tests/integration/test_profile_settings_and_constraints_order/configs/constraints_last.xml @@ -0,0 +1,22 @@ + + + + 1 + + + 1 + + + + + + + + + ::/0 + + default + default + + + diff --git a/tests/integration/test_profile_settings_and_constraints_order/test.py b/tests/integration/test_profile_settings_and_constraints_order/test.py new file mode 100644 index 00000000000..09a4e5dcf24 --- /dev/null +++ b/tests/integration/test_profile_settings_and_constraints_order/test.py @@ -0,0 +1,40 @@ +import pytest + +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +cluster = ClickHouseCluster(__file__) +node1 = cluster.add_instance("node1", user_configs=["configs/constraints_first.xml"]) +node2 = cluster.add_instance("node2", user_configs=["configs/constraints_last.xml"]) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + + +def test_profile_settings_and_constraints_order(started_cluster): + query = "SELECT name, readonly FROM system.settings WHERE name == 'log_queries'" + expected = """\ +log_queries 1""" + + settings = node1.query( + query, + user="test_profile_settings_and_constraints_order", + ) + + assert TSV(settings) == TSV(expected) + + settings = node2.query( + query, + user="test_profile_settings_and_constraints_order", + ) + + assert TSV(settings) == TSV(expected) diff --git a/tests/integration/test_server_reload/test.py b/tests/integration/test_server_reload/test.py index ad632dc64da..1323285b17f 100644 --- a/tests/integration/test_server_reload/test.py +++ b/tests/integration/test_server_reload/test.py @@ -112,11 +112,15 @@ def get_pgsql_client(cluster, port): time.sleep(0.1) +@contextlib.contextmanager def get_grpc_channel(cluster, port): host_port = cluster.get_instance_ip("instance") + f":{port}" channel = grpc.insecure_channel(host_port) grpc.channel_ready_future(channel).result(timeout=10) - return channel + try: + yield channel + finally: + channel.close() def grpc_query(channel, query_text): @@ -146,7 +150,7 @@ def configure_from_zk(zk, querier=None): zk.create(path=path, value=value, makepath=True) has_changed = True except NodeExistsError: - if zk.get(path) != value: + if zk.get(path)[0] != value: zk.set(path=path, value=value) has_changed = True if has_changed and querier is not None: @@ -238,16 +242,17 @@ def test_change_postgresql_port(cluster, zk): def test_change_grpc_port(cluster, zk): with default_client(cluster, zk) as client: - grpc_channel = get_grpc_channel(cluster, port=9100) - assert grpc_query(grpc_channel, "SELECT 1") == "1\n" - with sync_loaded_config(client.query): - zk.set("/clickhouse/ports/grpc", b"9090") - with pytest.raises( - grpc._channel._InactiveRpcError, match="StatusCode.UNAVAILABLE" - ): - grpc_query(grpc_channel, "SELECT 1") - grpc_channel_on_new_port = get_grpc_channel(cluster, port=9090) - assert grpc_query(grpc_channel_on_new_port, "SELECT 1") == "1\n" + with get_grpc_channel(cluster, port=9100) as grpc_channel: + assert grpc_query(grpc_channel, "SELECT 1") == "1\n" + with sync_loaded_config(client.query): + zk.set("/clickhouse/ports/grpc", b"9090") + with pytest.raises( + grpc._channel._InactiveRpcError, match="StatusCode.UNAVAILABLE" + ): + grpc_query(grpc_channel, "SELECT 1") + + with get_grpc_channel(cluster, port=9090) as grpc_channel_on_new_port: + assert grpc_query(grpc_channel_on_new_port, "SELECT 1") == "1\n" def test_remove_tcp_port(cluster, zk): @@ -292,14 +297,14 @@ def test_remove_postgresql_port(cluster, zk): def test_remove_grpc_port(cluster, zk): with default_client(cluster, zk) as client: - grpc_channel = get_grpc_channel(cluster, port=9100) - assert grpc_query(grpc_channel, "SELECT 1") == "1\n" - with sync_loaded_config(client.query): - zk.delete("/clickhouse/ports/grpc") - with pytest.raises( - grpc._channel._InactiveRpcError, match="StatusCode.UNAVAILABLE" - ): - grpc_query(grpc_channel, "SELECT 1") + with get_grpc_channel(cluster, port=9100) as grpc_channel: + assert grpc_query(grpc_channel, "SELECT 1") == "1\n" + with sync_loaded_config(client.query): + zk.delete("/clickhouse/ports/grpc") + with pytest.raises( + grpc._channel._InactiveRpcError, match="StatusCode.UNAVAILABLE" + ): + grpc_query(grpc_channel, "SELECT 1") def test_change_listen_host(cluster, zk): diff --git a/tests/integration/test_storage_s3/s3_mocks/echo.py b/tests/integration/test_storage_s3/s3_mocks/echo.py index 5103d7ebc15..7d04bb2f166 100644 --- a/tests/integration/test_storage_s3/s3_mocks/echo.py +++ b/tests/integration/test_storage_s3/s3_mocks/echo.py @@ -3,17 +3,20 @@ import sys class RequestHandler(http.server.BaseHTTPRequestHandler): - def do_HEAD(self): + def get_response(self): if self.path.startswith("/get-my-path/"): - self.send_response(200) - self.send_header("Content-Type", "text/plain") - self.end_headers() - + return b"/" + self.path.split("/", maxsplit=2)[2].encode() elif self.path == "/": + return b"OK" + + return None + + def do_HEAD(self): + if self.path.startswith("/get-my-path/") or self.path == "/": self.send_response(200) self.send_header("Content-Type", "text/plain") + self.send_header("Content-Length", len(self.get_response())) self.end_headers() - else: self.send_response(404) self.send_header("Content-Type", "text/plain") @@ -21,11 +24,7 @@ class RequestHandler(http.server.BaseHTTPRequestHandler): def do_GET(self): self.do_HEAD() - if self.path.startswith("/get-my-path/"): - self.wfile.write(b"/" + self.path.split("/", maxsplit=2)[2].encode()) - - elif self.path == "/": - self.wfile.write(b"OK") + self.wfile.write(self.get_response()) httpd = http.server.HTTPServer(("0.0.0.0", int(sys.argv[1])), RequestHandler) diff --git a/tests/integration/test_storage_s3/s3_mocks/mock_s3.py b/tests/integration/test_storage_s3/s3_mocks/mock_s3.py index 870353ebaa8..b31827e32bc 100644 --- a/tests/integration/test_storage_s3/s3_mocks/mock_s3.py +++ b/tests/integration/test_storage_s3/s3_mocks/mock_s3.py @@ -1,6 +1,6 @@ import sys -from bottle import abort, route, run, request, response +from bottle import route, run, request, response @route("/redirected/<_path:path>") @@ -14,14 +14,22 @@ def infinite_redirect(_path): def server(_bucket, _path): for name in request.headers: if name == "Authorization" and request.headers[name] == "Bearer TOKEN": - return "1, 2, 3" + result = "1, 2, 3" + response.content_type = "text/plain" + response.set_header("Content-Length", len(result)) + return result + + result = 'ForbiddenErrorForbidden Errortxfbd566d03042474888193-00608d7537' response.status = 403 response.content_type = "text/xml" - return 'ForbiddenErrorForbidden Errortxfbd566d03042474888193-00608d7537' + response.set_header("Content-Length", len(result)) + return result @route("/") def ping(): + response.content_type = "text/plain" + response.set_header("Content-Length", 2) return "OK" diff --git a/tests/integration/test_storage_s3/test.py b/tests/integration/test_storage_s3/test.py index 6e1a6e8a66b..937f14bb878 100644 --- a/tests/integration/test_storage_s3/test.py +++ b/tests/integration/test_storage_s3/test.py @@ -806,7 +806,7 @@ def test_custom_auth_headers_exclusion(started_cluster): print(result) assert ei.value.returncode == 243 - assert "Forbidden Error" in ei.value.stderr + assert "HTTP response code: 403" in ei.value.stderr def test_infinite_redirect(started_cluster): @@ -1714,7 +1714,7 @@ def test_ast_auth_headers(started_cluster): f"select count() from s3('http://resolver:8080/{bucket}/{filename}', 'CSV')" ) - assert "Forbidden Error" in result + assert "HTTP response code: 403" in result assert "S3_ERROR" in result result = instance.query( diff --git a/tests/integration/test_ttl_replicated/test.py b/tests/integration/test_ttl_replicated/test.py index aa4a09f1269..87f653acc76 100644 --- a/tests/integration/test_ttl_replicated/test.py +++ b/tests/integration/test_ttl_replicated/test.py @@ -449,8 +449,8 @@ def test_ttl_empty_parts(started_cluster): assert node1.query("SELECT count() FROM test_ttl_empty_parts") == "3000\n" # Wait for cleanup thread - wait_for_delete_inactive_parts(node1, "test_ttl_empty_parts") wait_for_delete_empty_parts(node1, "test_ttl_empty_parts") + wait_for_delete_inactive_parts(node1, "test_ttl_empty_parts") assert ( node1.query( diff --git a/tests/performance/direct_dictionary.xml b/tests/performance/direct_dictionary.xml index aa7b99e39de..a9472412364 100644 --- a/tests/performance/direct_dictionary.xml +++ b/tests/performance/direct_dictionary.xml @@ -129,10 +129,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_direct_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_direct_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_direct_dictionary; DROP DICTIONARY IF EXISTS complex_key_direct_dictionary; + DROP TABLE IF EXISTS simple_key_direct_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_direct_dictionary_source_table; diff --git a/tests/performance/flat_dictionary.xml b/tests/performance/flat_dictionary.xml index 8957925ef1a..8e625179608 100644 --- a/tests/performance/flat_dictionary.xml +++ b/tests/performance/flat_dictionary.xml @@ -73,8 +73,7 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_flat_dictionary_source_table - DROP DICTIONARY IF EXISTS simple_key_flat_dictionary + DROP TABLE IF EXISTS simple_key_flat_dictionary_source_table diff --git a/tests/performance/hashed_array_dictionary.xml b/tests/performance/hashed_array_dictionary.xml index 5d09d29a9e7..8a805f56371 100644 --- a/tests/performance/hashed_array_dictionary.xml +++ b/tests/performance/hashed_array_dictionary.xml @@ -129,10 +129,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_hashed_array_dictionary; DROP DICTIONARY IF EXISTS complex_key_hashed_array_dictionary; + DROP TABLE IF EXISTS simple_key_hashed_array_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_hashed_array_dictionary_source_table; diff --git a/tests/performance/hashed_dictionary.xml b/tests/performance/hashed_dictionary.xml index cf1cdac6df1..e9038e694c6 100644 --- a/tests/performance/hashed_dictionary.xml +++ b/tests/performance/hashed_dictionary.xml @@ -129,10 +129,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_hashed_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_hashed_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_hashed_dictionary; DROP DICTIONARY IF EXISTS complex_key_hashed_dictionary; + DROP TABLE IF EXISTS simple_key_hashed_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_hashed_dictionary_source_table; diff --git a/tests/performance/hierarchical_dictionaries.xml b/tests/performance/hierarchical_dictionaries.xml index 7d807bf2c8d..b1c385eedb5 100644 --- a/tests/performance/hierarchical_dictionaries.xml +++ b/tests/performance/hierarchical_dictionaries.xml @@ -68,8 +68,7 @@ SELECT {func}('hierarchical_{dictionary_layout}_dictionary', number + 1) FROM numbers(1000000) FORMAT Null; - DROP TABLE IF EXISTS hierarchical_dictionary_source_table; DROP DICTIONARY IF EXISTS hierarchical_{dictionary_layout}_dictionary; DROP DICTIONARY IF EXISTS hierarchical_flat_dictionary; - + DROP TABLE IF EXISTS hierarchical_dictionary_source_table; diff --git a/tests/performance/range_hashed_dictionary.xml b/tests/performance/range_hashed_dictionary.xml index bdf949cd1ff..2ee559cbdaa 100644 --- a/tests/performance/range_hashed_dictionary.xml +++ b/tests/performance/range_hashed_dictionary.xml @@ -117,10 +117,9 @@ FORMAT Null; - DROP TABLE IF EXISTS simple_key_range_hashed_dictionary_source_table; - DROP TABLE IF EXISTS complex_key_range_hashed_dictionary_source_table; - DROP DICTIONARY IF EXISTS simple_key_range_hashed_dictionary; DROP DICTIONARY IF EXISTS complex_key_range_hashed_dictionary; + DROP TABLE IF EXISTS simple_key_range_hashed_dictionary_source_table; + DROP TABLE IF EXISTS complex_key_range_hashed_dictionary_source_table; diff --git a/tests/performance/schema_inference_text_formats.xml b/tests/performance/schema_inference_text_formats.xml new file mode 100644 index 00000000000..4a57e2a5eda --- /dev/null +++ b/tests/performance/schema_inference_text_formats.xml @@ -0,0 +1,23 @@ + + + + + format + + TabSeparated + CSV + Values + JSONEachRow + JSONCompactEachRow + + + + + +INSERT INTO function file(data.{format}) SELECT WatchID, Title, EventTime, RefererCategories, RefererRegions FROM test.hits LIMIT 25000 SETTINGS engine_file_truncate_on_insert=1 + +DESC file(data.{format}) SETTINGS schema_inference_use_cache_for_file=0 + +INSERT INTO FUNCTION file(data.{format}) SELECT * FROM numbers(0) SETTINGS engine_file_truncate_on_insert=1 + + diff --git a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh index 74b7494f041..5e9e69d999d 100755 --- a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh @@ -65,7 +65,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (2, '0', 1);" -query_with_retry "ALTER TABLE src MOVE PARTITION 1 TO TABLE dst;" &>- +query_with_retry "ALTER TABLE src MOVE PARTITION 1 TO TABLE dst;" &>/dev/null $CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst;" $CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM src;" @@ -85,7 +85,7 @@ $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '0', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (1, '1', 1);" $CLICKHOUSE_CLIENT --query="INSERT INTO src VALUES (2, '0', 1);" -query_with_retry "ALTER TABLE src MOVE PARTITION 1 TO TABLE dst;" &>- +query_with_retry "ALTER TABLE src MOVE PARTITION 1 TO TABLE dst;" &>/dev/null $CLICKHOUSE_CLIENT --query="SYSTEM SYNC REPLICA dst;" $CLICKHOUSE_CLIENT --query="SELECT count(), sum(d) FROM src;" diff --git a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh index 3e2eda96f93..ccc4ed3e08d 100755 --- a/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh +++ b/tests/queries/0_stateless/01072_window_view_multiple_columns_groupby.sh @@ -12,7 +12,7 @@ DROP TABLE IF EXISTS wv; CREATE TABLE dst(time DateTime, colA String, colB String) Engine=MergeTree ORDER BY tuple(); CREATE TABLE mt(colA String, colB String) ENGINE=MergeTree ORDER BY tuple(); -CREATE WINDOW VIEW wv TO dst AS SELECT tumbleStart(w_id) AS time, colA, colB FROM mt GROUP BY tumble(now(), INTERVAL '1' SECOND, 'US/Samoa') AS w_id, colA, colB; +CREATE WINDOW VIEW wv TO dst AS SELECT tumbleStart(w_id) AS time, colA, colB FROM mt GROUP BY tumble(now(), INTERVAL '10' SECOND, 'US/Samoa') AS w_id, colA, colB; INSERT INTO mt VALUES ('test1', 'test2'); EOF diff --git a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql index c3cc0bbb9eb..1eff1c0779a 100644 --- a/tests/queries/0_stateless/01155_rename_move_materialized_view.sql +++ b/tests/queries/0_stateless/01155_rename_move_materialized_view.sql @@ -39,7 +39,7 @@ RENAME TABLE test_01155_ordinary.mv1 TO test_01155_atomic.mv1; RENAME TABLE test_01155_ordinary.mv2 TO test_01155_atomic.mv2; RENAME TABLE test_01155_ordinary.dst TO test_01155_atomic.dst; RENAME TABLE test_01155_ordinary.src TO test_01155_atomic.src; -SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_atomic.dict depends on test_01155_ordinary.dist" in the next line. +SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_ordinary.dict depends on test_01155_ordinary.dist" in the next line. RENAME TABLE test_01155_ordinary.dist TO test_01155_atomic.dist; SET check_table_dependencies=1; RENAME DICTIONARY test_01155_ordinary.dict TO test_01155_atomic.dict; @@ -65,7 +65,7 @@ SELECT dictGet('test_01155_ordinary.dict', 'x', 'after renaming database'); SELECT database, substr(name, 1, 10) FROM system.tables WHERE database like 'test_01155_%'; -- Move tables back -SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_atomic.dict depends on test_01155_ordinary.dist" in the next line. +SET check_table_dependencies=0; -- Otherwise we'll get error "test_01155_ordinary.dict depends on test_01155_ordinary.dist" in the next line. RENAME DATABASE test_01155_ordinary TO test_01155_atomic; SET check_table_dependencies=1; diff --git a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh index 3fb3730f758..a385fc81fe4 100755 --- a/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh +++ b/tests/queries/0_stateless/01169_alter_partition_isolation_stress.sh @@ -102,7 +102,7 @@ function thread_partition_src_to_dst() if echo "$output" | is_tx_aborted_with "FUNCTION_THROW_IF_VALUE_IS_NON_ZERO" then echo "thread_partition_src_to_dst tx_id: $tx_id session_id: $session_id" >&2 - echо "select throwIf has failed with FUNCTION_THROW_IF_VALUE_IS_NON_ZERO" >&2 + echo "select throwIf has failed with FUNCTION_THROW_IF_VALUE_IS_NON_ZERO" >&2 echo -e "trace_output:\n $trace_output" >&2 echo -e "output:\n $output" >&2 return 1 diff --git a/tests/queries/0_stateless/01172_transaction_counters.reference b/tests/queries/0_stateless/01172_transaction_counters.reference index 3099fae4a42..d088d747ee8 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.reference +++ b/tests/queries/0_stateless/01172_transaction_counters.reference @@ -23,6 +23,7 @@ 3 1 Begin 1 1 1 1 3 1 AddPart 1 1 1 1 all_3_3_0 3 1 Commit 1 1 1 0 +1 1 LockPart 1 1 1 1 all_2_2_0 4 1 Begin 1 1 1 1 4 1 AddPart 1 1 1 1 all_4_4_0 4 1 Commit 1 1 1 0 diff --git a/tests/queries/0_stateless/01172_transaction_counters.sql b/tests/queries/0_stateless/01172_transaction_counters.sql index ffe2d08e2d8..a809e4196e9 100644 --- a/tests/queries/0_stateless/01172_transaction_counters.sql +++ b/tests/queries/0_stateless/01172_transaction_counters.sql @@ -2,7 +2,7 @@ drop table if exists txn_counters; -create table txn_counters (n Int64, creation_tid DEFAULT transactionID()) engine=MergeTree order by n; +create table txn_counters (n Int64, creation_tid DEFAULT transactionID()) engine=MergeTree order by n SETTINGS old_parts_lifetime=3600; insert into txn_counters(n) values (1); select transactionID(); @@ -31,7 +31,7 @@ attach table txn_counters; begin transaction; insert into txn_counters(n) values (4); select 6, system.parts.name, txn_counters.creation_tid = system.parts.creation_tid from txn_counters join system.parts on txn_counters._part = system.parts.name where database=currentDatabase() and table='txn_counters' order by system.parts.name; -select 7, name, removal_tid, removal_csn from system.parts where database=currentDatabase() and table='txn_counters' order by system.parts.name; +select 7, name, removal_tid, removal_csn from system.parts where database=currentDatabase() and table='txn_counters' and active order by system.parts.name; select 8, transactionID().3 == serverUUID(); commit; diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference index 139926db671..0d4705dc9bf 100644 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.reference +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.reference @@ -27,7 +27,6 @@ Column 2, name: d, type: Decimal(18, 10), parsed text: "123456789"ERROR ERROR: There is no delimiter between fields: expected "", got "7Hello123" ERROR: There is no delimiter after last field: expected "", got "1" ERROR: There is no delimiter after last field: expected "", got "Hello" -Column 0, name: t, type: DateTime, ERROR: text "" is not like DateTime JSONCompactEachRow Column 2, name: d, type: Decimal(18, 10), parsed text: "123456789"ERROR Column 0, name: t, type: DateTime, parsed text: "2020-04-21 12:34:56"ERROR: DateTime must be in YYYY-MM-DD hh:mm:ss or NNNNNNNNNN (unix timestamp, exactly 10 digits) format. diff --git a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh index dde410d95c4..a6b3ebf4087 100755 --- a/tests/queries/0_stateless/01195_formats_diagnostic_info.sh +++ b/tests/queries/0_stateless/01195_formats_diagnostic_info.sh @@ -37,7 +37,6 @@ echo -e '2020-04-21 12:34:56\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERR echo -e '2020-04-21 12:34:567\tHello\t123456789' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\tHello\t12345678\t1' | "${PARSER[@]}" 2>&1| grep "ERROR" echo -e '2020-04-21 12:34:56\t\t123Hello' | "${PARSER[@]}" 2>&1| grep "ERROR" -echo -e '2020-04-21 12:34:56\tHello\t12345678\n' | "${PARSER[@]}" 2>&1| grep "ERROR" PARSER=(${CLICKHOUSE_LOCAL} --query 'SELECT t, s, d FROM table' --structure 't DateTime, s String, d Decimal64(10)' --input-format JSONCompactEachRow) echo '["2020-04-21 12:34:56", "Hello", 12345678]' | "${PARSER[@]}" 2>&1| grep "ERROR" || echo "JSONCompactEachRow" diff --git a/tests/queries/0_stateless/01293_show_clusters.reference b/tests/queries/0_stateless/01293_show_clusters.reference index ad790cc8b4f..c62f8cdfa2d 100644 --- a/tests/queries/0_stateless/01293_show_clusters.reference +++ b/tests/queries/0_stateless/01293_show_clusters.reference @@ -1,2 +1,2 @@ test_shard_localhost -test_shard_localhost 1 1 1 localhost ::1 9000 1 default 0 0 0 +test_shard_localhost 1 1 1 localhost ::1 9000 1 default diff --git a/tests/queries/0_stateless/01293_show_clusters.sh b/tests/queries/0_stateless/01293_show_clusters.sh new file mode 100755 index 00000000000..2fdf17ec25e --- /dev/null +++ b/tests/queries/0_stateless/01293_show_clusters.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "show clusters like 'test_shard%' limit 1" +# cluster,shard_num,shard_weight,replica_num,host_name,host_address,port,is_local,user,default_database[,errors_count,slowdowns_count,estimated_recovery_time] +$CLICKHOUSE_CLIENT -q "show cluster 'test_shard_localhost'" | cut -f-10 diff --git a/tests/queries/0_stateless/01293_show_clusters.sql b/tests/queries/0_stateless/01293_show_clusters.sql deleted file mode 100644 index e1ef8621a16..00000000000 --- a/tests/queries/0_stateless/01293_show_clusters.sql +++ /dev/null @@ -1,4 +0,0 @@ --- don't show all clusters to reduce dependency on the configuration of server -set send_logs_level = 'fatal'; -show clusters like 'test_shard%' limit 1; -show cluster 'test_shard_localhost'; diff --git a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference index 7cf545176e3..4bb0a9b8323 100644 --- a/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference +++ b/tests/queries/0_stateless/01324_if_transform_strings_to_enum.reference @@ -21,7 +21,7 @@ censor.net censor.net censor.net censor.net -SELECT if(number > 5, \'censor.net\', \'google\') +SELECT if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) FROM system.numbers LIMIT 10 other diff --git a/tests/queries/0_stateless/01408_range_overflow.reference b/tests/queries/0_stateless/01408_range_overflow.reference index e69de29bb2d..4f31f0710ee 100644 --- a/tests/queries/0_stateless/01408_range_overflow.reference +++ b/tests/queries/0_stateless/01408_range_overflow.reference @@ -0,0 +1,5 @@ +[1025,9223372036854776832] +[1025,9223372036854776832] +[1025,9223372036854776832] +[1025,9223372036854776832] +[1025,9223372036854776832] diff --git a/tests/queries/0_stateless/01408_range_overflow.sql b/tests/queries/0_stateless/01408_range_overflow.sql index 1640798999c..2107e8c3f36 100644 --- a/tests/queries/0_stateless/01408_range_overflow.sql +++ b/tests/queries/0_stateless/01408_range_overflow.sql @@ -1,12 +1,13 @@ -- executeGeneric() -SELECT range(1025, 1048576 + 9223372036854775807, 9223372036854775807); -- { serverError 69; } -SELECT range(1025, 1048576 + (9223372036854775807 AS i), i); -- { serverError 69; } +SELECT range(1025, 1048576 + 9223372036854775807, 9223372036854775807); +SELECT range(1025, 1048576 + (9223372036854775807 AS i), i); +SELECT range(1025, 18446744073709551615, 1); -- { serverError 69; } -- executeConstStep() -SELECT range(number, 1048576 + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1025; -- { serverError 69; } +SELECT range(number, 1048576 + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1025; -- executeConstStartStep() -SELECT range(1025, number + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1048576; -- { serverError 69; } +SELECT range(1025, number + 9223372036854775807, 9223372036854775807) FROM system.numbers LIMIT 1 OFFSET 1048576; -- executeConstStart() -SELECT range(1025, 1048576 + 9223372036854775807, number + 9223372036854775807) FROM system.numbers LIMIT 1; -- { serverError 69; } +SELECT range(1025, 1048576 + 9223372036854775807, number + 9223372036854775807) FROM system.numbers LIMIT 1; diff --git a/tests/queries/0_stateless/01560_ttl_remove_empty_parts.sh b/tests/queries/0_stateless/01560_ttl_remove_empty_parts.sh new file mode 100755 index 00000000000..b65e6019a2a --- /dev/null +++ b/tests/queries/0_stateless/01560_ttl_remove_empty_parts.sh @@ -0,0 +1,30 @@ +#!/usr/bin/env bash +set -euo pipefail + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./parts.lib +. "$CURDIR"/parts.lib + +${CLICKHOUSE_CLIENT} -q 'DROP TABLE IF EXISTS ttl_empty_parts' + +${CLICKHOUSE_CLIENT} -q ' + CREATE TABLE ttl_empty_parts (id UInt32, d Date) ENGINE = MergeTree ORDER BY tuple() PARTITION BY id SETTINGS old_parts_lifetime=5 +' + +${CLICKHOUSE_CLIENT} -q "INSERT INTO ttl_empty_parts SELECT 0, toDate('2005-01-01') + number from numbers(500);" +${CLICKHOUSE_CLIENT} -q "INSERT INTO ttl_empty_parts SELECT 1, toDate('2050-01-01') + number from numbers(500);" + +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ttl_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 'ttl_empty_parts' AND database = currentDatabase() AND active;" + +${CLICKHOUSE_CLIENT} -q "ALTER TABLE ttl_empty_parts MODIFY TTL d SETTINGS mutations_sync = 1;" + +# To be sure, that task, which clears outdated parts executed. +timeout 60 bash -c 'wait_for_delete_empty_parts ttl_empty_parts' + +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM ttl_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 'ttl_empty_parts' AND database = currentDatabase() AND active;" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE ttl_empty_parts;" diff --git a/tests/queries/0_stateless/01560_ttl_remove_empty_parts.sql b/tests/queries/0_stateless/01560_ttl_remove_empty_parts.sql deleted file mode 100644 index f40ed70caef..00000000000 --- a/tests/queries/0_stateless/01560_ttl_remove_empty_parts.sql +++ /dev/null @@ -1,20 +0,0 @@ -DROP TABLE IF EXISTS ttl_empty_parts; - -CREATE TABLE ttl_empty_parts (id UInt32, d Date) ENGINE = MergeTree ORDER BY tuple() PARTITION BY id; - -INSERT INTO ttl_empty_parts SELECT 0, toDate('2005-01-01') + number from numbers(500); -INSERT INTO ttl_empty_parts SELECT 1, toDate('2050-01-01') + number from numbers(500); - -SELECT count() FROM ttl_empty_parts; -SELECT count() FROM system.parts WHERE table = 'ttl_empty_parts' AND database = currentDatabase() AND active; - -ALTER TABLE ttl_empty_parts MODIFY TTL d; - --- To be sure, that task, which clears outdated parts executed. -DETACH TABLE ttl_empty_parts; -ATTACH TABLE ttl_empty_parts; - -SELECT count() FROM ttl_empty_parts; -SELECT count() FROM system.parts WHERE table = 'ttl_empty_parts' AND database = currentDatabase() AND active; - -DROP TABLE ttl_empty_parts; diff --git a/tests/queries/0_stateless/01606_git_import.sh b/tests/queries/0_stateless/01606_git_import.sh index 8a2091a99a8..585b39e21ab 100755 --- a/tests/queries/0_stateless/01606_git_import.sh +++ b/tests/queries/0_stateless/01606_git_import.sh @@ -1,4 +1,5 @@ #!/usr/bin/env bash +# Tags: no-debug CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect index 1be56675b33..0e06c2f99df 100755 --- a/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect +++ b/tests/queries/0_stateless/01910_client_replxx_container_overflow_long.expect @@ -24,12 +24,13 @@ send -- "SELECT 1\r" expect "1" expect ":) " -# Do reverse-search. -send -- "" +# Do regular reverse-search (Ctrl-T). +send -- "" expect "(reverse-i-search)" send -- "1" expect "(reverse-i-search)" # This will trigger the container-overflow under ASAN before the fix. +# NOTE: that the next search in the list is still Ctrl-R send -- "" expect "(reverse-i-search)" diff --git a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh index 2f47001eda9..2f43423e13e 100755 --- a/tests/queries/0_stateless/02022_storage_filelog_one_file.sh +++ b/tests/queries/0_stateless/02022_storage_filelog_one_file.sh @@ -20,23 +20,25 @@ done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt', 'CSV');" -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do echo $i, $i >> ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt done -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" # touch does not change file content, no event touch ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" + +${CLICKHOUSE_CLIENT} --query "drop table file_log;" rm -rf ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}.txt diff --git a/tests/queries/0_stateless/02023_storage_filelog.sh b/tests/queries/0_stateless/02023_storage_filelog.sh index 7480e378d8b..71ed5ba5471 100755 --- a/tests/queries/0_stateless/02023_storage_filelog.sh +++ b/tests/queries/0_stateless/02023_storage_filelog.sh @@ -23,11 +23,11 @@ done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do @@ -44,7 +44,7 @@ mv ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt ${user_files_path}/${ rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/e.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/f.txt @@ -60,18 +60,18 @@ do done ${CLICKHOUSE_CLIENT} --query "attach table file_log;" -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" diff --git a/tests/queries/0_stateless/02025_dictionary_view_different_db.sql b/tests/queries/0_stateless/02025_dictionary_view_different_db.sql index b06285b683f..f26a63bced4 100644 --- a/tests/queries/0_stateless/02025_dictionary_view_different_db.sql +++ b/tests/queries/0_stateless/02025_dictionary_view_different_db.sql @@ -34,8 +34,8 @@ DROP VIEW IF EXISTS test_view_different_db; CREATE VIEW test_view_different_db AS SELECT id, value, dictGet('2025_test_db.test_dictionary', 'value', id) FROM 2025_test_db.view_table; SELECT * FROM test_view_different_db; -DROP TABLE 2025_test_db.test_table; DROP DICTIONARY 2025_test_db.test_dictionary; +DROP TABLE 2025_test_db.test_table; DROP TABLE 2025_test_db.view_table; DROP VIEW test_view_different_db; diff --git a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh index f0faafe55d5..e4041b2d755 100755 --- a/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh +++ b/tests/queries/0_stateless/02025_storage_filelog_virtual_col.sh @@ -24,11 +24,11 @@ done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt8, v UInt8) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" -${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset;" +${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/b.txt -${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset;" +${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" for i in {100..120} do @@ -44,18 +44,18 @@ cp ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt ${user_files_path}/${ rm ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/d.txt -${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset;" +${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "detach table file_log;" ${CLICKHOUSE_CLIENT} --query "attach table file_log;" # should no records return -${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset;" +${CLICKHOUSE_CLIENT} --query "select *, _filename, _offset from file_log order by _filename, _offset settings stream_like_engine_allow_direct_select=1;" truncate ${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/a.txt --size 0 # exception happend -${CLICKHOUSE_CLIENT} --query "select * from file_log order by k;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' +${CLICKHOUSE_CLIENT} --query "select * from file_log order by k settings stream_like_engine_allow_direct_select=1;" 2>&1 | grep -q "Code: 33" && echo 'OK' || echo 'FAIL' ${CLICKHOUSE_CLIENT} --query "drop table file_log;" diff --git a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh index c28d20c9e8a..41a9d82949c 100755 --- a/tests/queries/0_stateless/02026_storage_filelog_largefile.sh +++ b/tests/queries/0_stateless/02026_storage_filelog_largefile.sh @@ -26,14 +26,14 @@ done ${CLICKHOUSE_CLIENT} --query "drop table if exists file_log;" ${CLICKHOUSE_CLIENT} --query "create table file_log(k UInt32, v UInt32) engine=FileLog('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/', 'CSV');" -${CLICKHOUSE_CLIENT} --query "select count() from file_log " +${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" for i in {11..20} do ${CLICKHOUSE_CLIENT} --query "insert into function file('${user_files_path}/${CLICKHOUSE_TEST_UNIQUE_NAME}/test$i.csv', 'CSV', 'k UInt32, v UInt32') select number, number from numbers(10000);" done -${CLICKHOUSE_CLIENT} --query "select count() from file_log " +${CLICKHOUSE_CLIENT} --query "select count() from file_log settings stream_like_engine_allow_direct_select=1;" ${CLICKHOUSE_CLIENT} --query "drop table file_log;" diff --git a/tests/queries/0_stateless/02117_show_create_table_system.reference b/tests/queries/0_stateless/02117_show_create_table_system.reference index 81536c8544f..c866f3e7b52 100644 --- a/tests/queries/0_stateless/02117_show_create_table_system.reference +++ b/tests/queries/0_stateless/02117_show_create_table_system.reference @@ -128,6 +128,7 @@ CREATE TABLE system.databases `data_path` String, `metadata_path` String, `uuid` UUID, + `engine_full` String, `comment` String, `database` String ) @@ -503,6 +504,8 @@ CREATE TABLE system.parts `creation_csn` UInt64, `removal_csn` UInt64, `has_lightweight_delete` UInt8, + `last_removal_attemp_time` DateTime, + `removal_state` String, `bytes` UInt64, `marks_size` UInt64 ) diff --git a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference index b0ec4bef499..b881fce1539 100644 --- a/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference +++ b/tests/queries/0_stateless/02149_schema_inference_formats_with_schema.reference @@ -24,12 +24,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(UInt64)) -tuple Tuple(Nullable(UInt64), Nullable(String)) +tuple Tuple(`1` Nullable(UInt64), `2` Nullable(String)) map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) +nested1 Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) ArrowStream @@ -58,12 +58,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(UInt64)) -tuple Tuple(Nullable(UInt64), Nullable(String)) +tuple Tuple(`1` Nullable(UInt64), `2` Nullable(String)) map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) +nested1 Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) Parquet @@ -92,12 +92,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(UInt64)) -tuple Tuple(Nullable(UInt64), Nullable(String)) +tuple Tuple(`1` Nullable(UInt64), `2` Nullable(String)) map Map(String, Nullable(UInt64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(UInt64)), Map(String, Nullable(UInt64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(UInt64))), Map(UInt64, Array(Tuple(Nullable(UInt64), Nullable(String))))), Nullable(UInt8)) +nested1 Array(Tuple(`1` Array(Nullable(UInt64)), `2` Map(String, Nullable(UInt64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(UInt64))), `2` Map(UInt64, Array(Tuple(`1` Nullable(UInt64), `2` Nullable(String))))), `2` Nullable(UInt8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) ORC @@ -126,12 +126,12 @@ fixed_string Nullable(String) Str: 0 100 Str: 1 200 array Array(Nullable(Int64)) -tuple Tuple(Nullable(Int64), Nullable(String)) +tuple Tuple(`1` Nullable(Int64), `2` Nullable(String)) map Map(String, Nullable(Int64)) [0,1] (0,'0') {'0':0} [1,2] (1,'1') {'1':1} -nested1 Array(Tuple(Array(Nullable(Int64)), Map(String, Nullable(Int64)))) -nested2 Tuple(Tuple(Array(Array(Nullable(Int64))), Map(Int64, Array(Tuple(Nullable(Int64), Nullable(String))))), Nullable(Int8)) +nested1 Array(Tuple(`1` Array(Nullable(Int64)), `2` Map(String, Nullable(Int64)))) +nested2 Tuple(`1` Tuple(`1` Array(Array(Nullable(Int64))), `2` Map(Int64, Array(Tuple(`1` Nullable(Int64), `2` Nullable(String))))), `2` Nullable(Int8)) [([0,1],{'42':0}),([],{}),([42],{'42':42})] (([[0],[1],[]],{0:[(0,'42'),(1,'42')]}),42) [([1,2],{'42':1}),([],{}),([42],{'42':42})] (([[1],[2],[]],{1:[(1,'42'),(2,'42')]}),42) Native diff --git a/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql b/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql index 686025acbf8..f21fa16ba23 100644 --- a/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql +++ b/tests/queries/0_stateless/02179_dict_reload_on_cluster.sql @@ -23,6 +23,7 @@ SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_02179' AND SELECT 'SYSTEM RELOAD DICTIONARIES ON CLUSTER test_shard_localhost'; SET distributed_ddl_output_mode='throw'; +SYSTEM RELOAD DICTIONARIES ON CLUSTER; -- { clientError SYNTAX_ERROR } SYSTEM RELOAD DICTIONARIES ON CLUSTER test_shard_localhost; SET distributed_ddl_output_mode='none'; SELECT query_count FROM system.dictionaries WHERE database = 'dict_db_02179' AND name = 'dict'; diff --git a/tests/queries/0_stateless/02187_msg_pack_uuid.reference b/tests/queries/0_stateless/02187_msg_pack_uuid.reference index c567cc14ad2..68104e8f901 100644 --- a/tests/queries/0_stateless/02187_msg_pack_uuid.reference +++ b/tests/queries/0_stateless/02187_msg_pack_uuid.reference @@ -1,4 +1,4 @@ 5e7084e0-019f-461f-9e70-84e0019f561f 5e7084e0-019f-461f-9e70-84e0019f561f 5e7084e0-019f-461f-9e70-84e0019f561f -5e7084e0-019f-461f-9e70-84e0019f561f UUID +5e7084e0-019f-461f-9e70-84e0019f561f Nullable(UUID) diff --git a/tests/queries/0_stateless/02223_insert_select_schema_inference.sql b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql index ff39ca83b9b..031ced1b299 100644 --- a/tests/queries/0_stateless/02223_insert_select_schema_inference.sql +++ b/tests/queries/0_stateless/02223_insert_select_schema_inference.sql @@ -1,5 +1,5 @@ drop table if exists test; create table test (x UInt32, y String, d Date) engine=Memory() as select number as x, toString(number) as y, toDate(number) as d from numbers(10); -insert into table function file('data.native.zst') select * from test; +insert into table function file('data.native.zst') select * from test settings engine_file_truncate_on_insert=1; desc file('data.native.zst'); select * from file('data.native.zst'); diff --git a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference index debc5c58936..2ecce985eb4 100644 --- a/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference +++ b/tests/queries/0_stateless/02242_arrow_orc_parquet_nullable_schema_inference.reference @@ -2,7 +2,7 @@ Arrow x Nullable(UInt64) arr1 Array(Nullable(UInt64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] @@ -12,7 +12,7 @@ ArrowStream x Nullable(UInt64) arr1 Array(Nullable(UInt64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] @@ -22,7 +22,7 @@ Parquet x Nullable(UInt64) arr1 Array(Nullable(UInt64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(UInt64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(UInt64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] @@ -32,7 +32,7 @@ ORC x Nullable(Int64) arr1 Array(Nullable(Int64)) arr2 Array(Array(Nullable(String))) -arr3 Array(Tuple(Nullable(String), Nullable(Int64))) +arr3 Array(Tuple(`1` Nullable(String), `2` Nullable(Int64))) 0 [0,1] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,0)] \N [NULL,2] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,1)] 2 [2,3] [[NULL,'String'],[NULL],[]] [(NULL,NULL),('String',NULL),(NULL,2)] diff --git a/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference b/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference index e9754463ba1..fb198bd8401 100644 --- a/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference +++ b/tests/queries/0_stateless/02245_s3_support_read_nested_column.reference @@ -4,7 +4,8 @@ drop table if exists test_02245_s3_nested_parquet2; set input_format_parquet_import_nested = 1; create table test_02245_s3_nested_parquet1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet1_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); +1 2 a create table test_02245_s3_nested_parquet2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet2_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet2 values (1, (2, (3, 'a'))); select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_parquet2_*', format='Parquet', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))'); @@ -14,7 +15,8 @@ drop table if exists test_02245_s3_nested_arrow2; set input_format_arrow_import_nested=1; create table test_02245_s3_nested_arrow1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow1_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); +1 2 a create table test_02245_s3_nested_arrow2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow2_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow2 values (1, (2, (3, 'a'))); select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_arrow2_*', format='Arrow', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))'); @@ -24,7 +26,8 @@ drop table if exists test_02245_s3_nested_orc2; set input_format_orc_import_nested=1; create table test_02245_s3_nested_orc1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_orc1_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); +1 2 a create table test_02245_s3_nested_orc2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_orc2_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc2 values (1, (2, (3, 'a'))); select a, b.a, b.b.c, b.b.d from s3(s3_conn, filename='test_02245_s3_nested_orc2_*', format='ORC', structure='a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))'); diff --git a/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql b/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql index 14fc7cee7dc..08788306de7 100644 --- a/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql +++ b/tests/queries/0_stateless/02245_s3_support_read_nested_column.sql @@ -8,7 +8,7 @@ set input_format_parquet_import_nested = 1; create table test_02245_s3_nested_parquet1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet1_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_parquet1_*', format='Parquet'); create table test_02245_s3_nested_parquet2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_parquet2_{_partition_id}', format='Parquet') partition by a; insert into test_02245_s3_nested_parquet2 values (1, (2, (3, 'a'))); @@ -22,7 +22,7 @@ set input_format_arrow_import_nested=1; create table test_02245_s3_nested_arrow1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow1_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_arrow1_*', format='Arrow'); create table test_02245_s3_nested_arrow2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_arrow2_{_partition_id}', format='Arrow') partition by a; insert into test_02245_s3_nested_arrow2 values (1, (2, (3, 'a'))); @@ -36,7 +36,7 @@ set input_format_orc_import_nested=1; create table test_02245_s3_nested_orc1(a Int64, b Tuple(a Int64, b String)) engine=S3(s3_conn, filename='test_02245_s3_nested_orc1_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc1 values (1, (2, 'a')); -select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); -- { serverError 47 } +select a, b.a, b.b from s3(s3_conn, filename='test_02245_s3_nested_orc1_*', format='ORC'); create table test_02245_s3_nested_orc2(a Int64, b Tuple(a Int64, b Tuple(c Int64, d String))) engine=S3(s3_conn, filename='test_02245_s3_nested_orc2_{_partition_id}', format='ORC') partition by a; insert into test_02245_s3_nested_orc2 values (1, (2, (3, 'a'))); diff --git a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh index 701e18a0259..d4b3b89b3ba 100755 --- a/tests/queries/0_stateless/02267_file_globs_schema_inference.sh +++ b/tests/queries/0_stateless/02267_file_globs_schema_inference.sh @@ -14,5 +14,5 @@ $CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*. $CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data4.jsonl', 'TSV') select 1 as x"; $CLICKHOUSE_CLIENT -q "insert into function file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data1.jsonl', 'TSV') select [1,2,3] as x SETTINGS engine_file_truncate_on_insert = 1"; -$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "INCORRECT_DATA" && echo "OK" || echo "FAIL"; +$CLICKHOUSE_CLIENT -q "select * from file('${CLICKHOUSE_TEST_UNIQUE_NAME}_data*.jsonl') settings schema_inference_use_cache_for_file=0" 2>&1 | grep -F -q "CANNOT_PARSE_INPUT_ASSERTION_FAILED" && echo "OK" || echo "FAIL"; diff --git a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql index 7427426602a..5462d38f1a3 100644 --- a/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql +++ b/tests/queries/0_stateless/02268_json_wrong_root_type_in_schema_inference.sql @@ -1,7 +1,7 @@ -- Tags: no-fasttest insert into function file('02268_data.jsonl', 'TSV') select 1; -select * from file('02268_data.jsonl'); --{serverError 117} +select * from file('02268_data.jsonl'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED} insert into function file('02268_data.jsonCompactEachRow', 'TSV') select 1; -select * from file('02268_data.jsonCompactEachRow'); --{serverError 117} +select * from file('02268_data.jsonCompactEachRow'); --{serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED} diff --git a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql index 8ea9dba2696..82b433e5948 100644 --- a/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql +++ b/tests/queries/0_stateless/02269_insert_select_with_format_without_schema_inference.sql @@ -1,2 +1,2 @@ -insert into function file('02269_data', 'RowBinary') select 1; +insert into function file('02269_data', 'RowBinary') select 1 settings engine_file_truncate_on_insert=1; select * from file('02269_data', 'RowBinary', 'x UInt8'); diff --git a/tests/queries/0_stateless/02270_errors_in_files_s3.reference b/tests/queries/0_stateless/02270_errors_in_files_s3.reference index bf106b8562b..49515a81c8b 100644 --- a/tests/queries/0_stateless/02270_errors_in_files_s3.reference +++ b/tests/queries/0_stateless/02270_errors_in_files_s3.reference @@ -1,2 +1,2 @@ http://localhost:11111/test/tsv_with_header.tsv -test/tsv_with_header.tsv +tsv_with_header.tsv diff --git a/tests/queries/0_stateless/02270_errors_in_files_s3.sh b/tests/queries/0_stateless/02270_errors_in_files_s3.sh index 8d8c6ba56ef..e98a0cc6ae0 100755 --- a/tests/queries/0_stateless/02270_errors_in_files_s3.sh +++ b/tests/queries/0_stateless/02270_errors_in_files_s3.sh @@ -6,4 +6,4 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) . "$CUR_DIR"/../shell_config.sh ${CLICKHOUSE_CLIENT} --query "SELECT * FROM url('http://localhost:11111/test/{a,tsv_with_header}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64')" 2>&1 | grep -o -m1 "http://localhost:11111/test/tsv_with_header.tsv" -${CLICKHOUSE_CLIENT} --query "SELECT * FROM s3('http://localhost:11111/test/{a,tsv_with_header}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64')" 2>&1 | grep -o -m1 "test/tsv_with_header.tsv" +${CLICKHOUSE_CLIENT} --query "SELECT * FROM s3('http://localhost:11111/test/{a,tsv_with_header}.tsv', 'TSV', 'c1 UInt64, c2 UInt64, c3 UInt64')" 2>&1 | grep -o -m1 "tsv_with_header.tsv" diff --git a/tests/queries/0_stateless/02344_describe_cache.sql b/tests/queries/0_stateless/02344_describe_cache.sql index 8b3831bcaa8..fef004cb27f 100644 --- a/tests/queries/0_stateless/02344_describe_cache.sql +++ b/tests/queries/0_stateless/02344_describe_cache.sql @@ -1,4 +1,7 @@ --- Tags: no-fasttest +-- Tags: no-fasttest, no-parallel + +SYSTEM DROP FILESYSTEM CACHE 's3_cache/'; +SYSTEM DROP FILESYSTEM CACHE 's3_cache_2/'; DESCRIBE FILESYSTEM CACHE 's3_cache'; DESCRIBE FILESYSTEM CACHE 's3_cache_2'; diff --git a/tests/queries/0_stateless/02354_numeric_literals_with_underscores.reference b/tests/queries/0_stateless/02354_numeric_literals_with_underscores.reference new file mode 100644 index 00000000000..f737a774e09 --- /dev/null +++ b/tests/queries/0_stateless/02354_numeric_literals_with_underscores.reference @@ -0,0 +1,6 @@ +1000 +1.000001 +100.00001 +305419896 +610839792 +583 diff --git a/tests/queries/0_stateless/02354_numeric_literals_with_underscores.sql b/tests/queries/0_stateless/02354_numeric_literals_with_underscores.sql new file mode 100644 index 00000000000..b58bbfc5850 --- /dev/null +++ b/tests/queries/0_stateless/02354_numeric_literals_with_underscores.sql @@ -0,0 +1,6 @@ +select 1_000; +select 1.00_00_01; +select 1.000_0001e2; +select 0x12_34_56_78; +select 0x12_34_56_78p1; +select 0b0010_0100_0111; diff --git a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference index 978f19d8381..34180020680 100644 --- a/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference +++ b/tests/queries/0_stateless/02415_all_new_functions_must_be_documented.reference @@ -82,7 +82,6 @@ addYears addressToLine addressToLineWithInlines addressToSymbol -age alphaTokens and appendTrailingCharIfAbsent diff --git a/tests/queries/0_stateless/02421_type_json_empty_parts.sh b/tests/queries/0_stateless/02421_type_json_empty_parts.sh new file mode 100755 index 00000000000..b6cf5995bfa --- /dev/null +++ b/tests/queries/0_stateless/02421_type_json_empty_parts.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +set -euo pipefail + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./parts.lib +. "$CURDIR"/parts.lib + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT 'Collapsing';" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, s Int8, data JSON) ENGINE = CollapsingMergeTree(s) ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, 1, '{\"k1\": \"aaa\"}') (1, -1, '{\"k2\": \"bbb\"}');" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1;" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT 'DELETE all';" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, data JSON) ENGINE = MergeTree ORDER BY id SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, '{\"k1\": \"aaa\"}') (1, '{\"k2\": \"bbb\"}');" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1;" +${CLICKHOUSE_CLIENT} -q "ALTER TABLE t_json_empty_parts DELETE WHERE 1 SETTINGS mutations_sync = 1;" +timeout 60 bash -c 'wait_for_delete_empty_parts t_json_empty_parts' +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1;" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT 'TTL';" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE t_json_empty_parts (id UInt64, d Date, data JSON) ENGINE = MergeTree ORDER BY id TTL d WHERE id % 2 = 1 SETTINGS old_parts_lifetime=5;" --allow_experimental_object_type 1 +${CLICKHOUSE_CLIENT} -q "INSERT INTO t_json_empty_parts VALUES (1, '2000-01-01', '{\"k1\": \"aaa\"}') (2, '2000-01-01', '{\"k2\": \"bbb\"}');" +${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE t_json_empty_parts FINAL;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1;" +${CLICKHOUSE_CLIENT} -q "ALTER TABLE t_json_empty_parts MODIFY TTL d;" +${CLICKHOUSE_CLIENT} -q "OPTIMIZE TABLE t_json_empty_parts FINAL;" +timeout 60 bash -c 'wait_for_delete_empty_parts t_json_empty_parts' +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM t_json_empty_parts;" +${CLICKHOUSE_CLIENT} -q "SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active;" +${CLICKHOUSE_CLIENT} -q "DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1;" + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS t_json_empty_parts;" diff --git a/tests/queries/0_stateless/02421_type_json_empty_parts.sql b/tests/queries/0_stateless/02421_type_json_empty_parts.sql deleted file mode 100644 index 409a2b18a49..00000000000 --- a/tests/queries/0_stateless/02421_type_json_empty_parts.sql +++ /dev/null @@ -1,61 +0,0 @@ --- Tags: no-fasttest - -SET allow_experimental_object_type = 1; - -DROP TABLE IF EXISTS t_json_empty_parts; - -SELECT 'Collapsing'; -CREATE TABLE t_json_empty_parts (id UInt64, s Int8, data JSON) ENGINE = CollapsingMergeTree(s) ORDER BY id; - -INSERT INTO t_json_empty_parts VALUES (1, 1, '{"k1": "aaa"}') (1, -1, '{"k2": "bbb"}'); - -SELECT count() FROM t_json_empty_parts; -SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active; -DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1; - -DROP TABLE t_json_empty_parts; - -DROP TABLE IF EXISTS t_json_empty_parts; - -SELECT 'DELETE all'; -CREATE TABLE t_json_empty_parts (id UInt64, data JSON) ENGINE = MergeTree ORDER BY id; - -INSERT INTO t_json_empty_parts VALUES (1, '{"k1": "aaa"}') (2, '{"k2": "bbb"}'); - -SELECT count() FROM t_json_empty_parts; -SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active; -DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1; - -SET mutations_sync = 2; -ALTER TABLE t_json_empty_parts DELETE WHERE 1; - -DETACH TABLE t_json_empty_parts; -ATTACH TABLE t_json_empty_parts; - -SELECT count() FROM t_json_empty_parts; -SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active; -DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1; - -DROP TABLE IF EXISTS t_json_empty_parts; - -SELECT 'TTL'; -CREATE TABLE t_json_empty_parts (id UInt64, d Date, data JSON) ENGINE = MergeTree ORDER BY id TTL d WHERE id % 2 = 1; - -INSERT INTO t_json_empty_parts VALUES (1, '2000-01-01', '{"k1": "aaa"}') (2, '2000-01-01', '{"k2": "bbb"}'); -OPTIMIZE TABLE t_json_empty_parts FINAL; - -SELECT count() FROM t_json_empty_parts; -SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active; -DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1; - -ALTER TABLE t_json_empty_parts MODIFY TTL d; -OPTIMIZE TABLE t_json_empty_parts FINAL; - -DETACH TABLE t_json_empty_parts; -ATTACH TABLE t_json_empty_parts; - -SELECT count() FROM t_json_empty_parts; -SELECT count() FROM system.parts WHERE table = 't_json_empty_parts' AND database = currentDatabase() AND active; -DESC TABLE t_json_empty_parts SETTINGS describe_extend_object_types = 1; - -DROP TABLE IF EXISTS t_json_empty_parts; diff --git a/tests/queries/0_stateless/02423_drop_memory_parts.sql b/tests/queries/0_stateless/02423_drop_memory_parts.sql index 0d42847f6e5..fad81d46e68 100644 --- a/tests/queries/0_stateless/02423_drop_memory_parts.sql +++ b/tests/queries/0_stateless/02423_drop_memory_parts.sql @@ -35,4 +35,4 @@ ATTACH TABLE table_in_memory; SELECT count() FROM table_in_memory; SELECT name, part_type, rows, active from system.parts -WHERE table='table_in_memory' AND database=currentDatabase(); +WHERE table='table_in_memory' AND database=currentDatabase() and active; diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.reference b/tests/queries/0_stateless/02475_bson_each_row_format.reference index b4a841ed3eb..5922167dc97 100644 --- a/tests/queries/0_stateless/02475_bson_each_row_format.reference +++ b/tests/queries/0_stateless/02475_bson_each_row_format.reference @@ -233,11 +233,11 @@ Schema inference x Nullable(Int32) x Nullable(Int64) x Nullable(Int64) -FAIL +OK x Array(Nullable(Int32)) x Array(Nullable(Int64)) x Array(Nullable(Int64)) -FAIL +OK OK OK OK diff --git a/tests/queries/0_stateless/02475_bson_each_row_format.sh b/tests/queries/0_stateless/02475_bson_each_row_format.sh index 6de33b38183..b4efea7e326 100755 --- a/tests/queries/0_stateless/02475_bson_each_row_format.sh +++ b/tests/queries/0_stateless/02475_bson_each_row_format.sh @@ -164,7 +164,7 @@ $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::Int64 as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" -$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select number::UInt64 as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select toString(number) as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Bool] as x from numbers(2) settings engine_file_truncate_on_insert=1" @@ -174,7 +174,7 @@ $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::Int64] as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" -$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [number::UInt64] as x from numbers(2)" +$CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [toString(number)] as x from numbers(2)" $CLICKHOUSE_CLIENT -q "desc file(02475_data.bsonEachRow)" 2>&1 | grep -q -F "TYPE_MISMATCH" && echo "OK" || echo "FAIL" $CLICKHOUSE_CLIENT -q "insert into function file(02475_data.bsonEachRow) select [] as x from numbers(2) settings engine_file_truncate_on_insert=1" diff --git a/tests/queries/0_stateless/02477_age.reference b/tests/queries/0_stateless/02477_age.reference deleted file mode 100644 index 249c413d923..00000000000 --- a/tests/queries/0_stateless/02477_age.reference +++ /dev/null @@ -1,76 +0,0 @@ -Various intervals --1 -0 -0 --7 --3 -0 --23 --11 -0 --103 --52 -0 --730 --364 -1 --17520 --8736 -24 --1051200 --524160 -1440 --63072000 --31449600 -86400 -DateTime arguments -0 -23 -1439 -86399 -Date and DateTime arguments --63072000 --31449600 -86400 -Constant and non-constant arguments --1051200 --524160 -1440 -Case insensitive --10 -Dependance of timezones -0 -0 -1 -25 -1500 -90000 -0 -0 -1 -24 -1440 -86400 -0 -0 -1 -25 -1500 -90000 -0 -0 -1 -24 -1440 -86400 -Additional test -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 diff --git a/tests/queries/0_stateless/02477_age.sql b/tests/queries/0_stateless/02477_age.sql deleted file mode 100644 index 9b612276b01..00000000000 --- a/tests/queries/0_stateless/02477_age.sql +++ /dev/null @@ -1,82 +0,0 @@ -SELECT 'Various intervals'; - -SELECT age('year', toDate('2017-12-31'), toDate('2016-01-01')); -SELECT age('year', toDate('2017-12-31'), toDate('2017-01-01')); -SELECT age('year', toDate('2017-12-31'), toDate('2018-01-01')); -SELECT age('quarter', toDate('2017-12-31'), toDate('2016-01-01')); -SELECT age('quarter', toDate('2017-12-31'), toDate('2017-01-01')); -SELECT age('quarter', toDate('2017-12-31'), toDate('2018-01-01')); -SELECT age('month', toDate('2017-12-31'), toDate('2016-01-01')); -SELECT age('month', toDate('2017-12-31'), toDate('2017-01-01')); -SELECT age('month', toDate('2017-12-31'), toDate('2018-01-01')); -SELECT age('week', toDate('2017-12-31'), toDate('2016-01-01')); -SELECT age('week', toDate('2017-12-31'), toDate('2017-01-01')); -SELECT age('week', toDate('2017-12-31'), toDate('2018-01-01')); -SELECT age('day', toDate('2017-12-31'), toDate('2016-01-01')); -SELECT age('day', toDate('2017-12-31'), toDate('2017-01-01')); -SELECT age('day', toDate('2017-12-31'), toDate('2018-01-01')); -SELECT age('hour', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); -SELECT age('hour', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); -SELECT age('hour', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); -SELECT age('minute', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); -SELECT age('minute', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); -SELECT age('minute', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); -SELECT age('second', toDate('2017-12-31'), toDate('2016-01-01'), 'UTC'); -SELECT age('second', toDate('2017-12-31'), toDate('2017-01-01'), 'UTC'); -SELECT age('second', toDate('2017-12-31'), toDate('2018-01-01'), 'UTC'); - -SELECT 'DateTime arguments'; -SELECT age('day', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); -SELECT age('hour', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); -SELECT age('minute', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); -SELECT age('second', toDateTime('2016-01-01 00:00:01', 'UTC'), toDateTime('2016-01-02 00:00:00', 'UTC'), 'UTC'); - -SELECT 'Date and DateTime arguments'; - -SELECT age('second', toDate('2017-12-31'), toDateTime('2016-01-01 00:00:00', 'UTC'), 'UTC'); -SELECT age('second', toDateTime('2017-12-31 00:00:00', 'UTC'), toDate('2017-01-01'), 'UTC'); -SELECT age('second', toDateTime('2017-12-31 00:00:00', 'UTC'), toDateTime('2018-01-01 00:00:00', 'UTC')); - -SELECT 'Constant and non-constant arguments'; - -SELECT age('minute', materialize(toDate('2017-12-31')), toDate('2016-01-01'), 'UTC'); -SELECT age('minute', toDate('2017-12-31'), materialize(toDate('2017-01-01')), 'UTC'); -SELECT age('minute', materialize(toDate('2017-12-31')), materialize(toDate('2018-01-01')), 'UTC'); - -SELECT 'Case insensitive'; - -SELECT age('year', today(), today() - INTERVAL 10 YEAR); - -SELECT 'Dependance of timezones'; - -SELECT age('month', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); -SELECT age('week', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); -SELECT age('day', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); -SELECT age('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); -SELECT age('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); -SELECT age('second', toDate('2014-10-26'), toDate('2014-10-27'), 'Asia/Istanbul'); - -SELECT age('month', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); -SELECT age('week', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); -SELECT age('day', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); -SELECT age('hour', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); -SELECT age('minute', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); -SELECT age('second', toDate('2014-10-26'), toDate('2014-10-27'), 'UTC'); - -SELECT age('month', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); -SELECT age('week', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); -SELECT age('day', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); -SELECT age('hour', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); -SELECT age('minute', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); -SELECT age('second', toDateTime('2014-10-26 00:00:00', 'Asia/Istanbul'), toDateTime('2014-10-27 00:00:00', 'Asia/Istanbul')); - -SELECT age('month', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); -SELECT age('week', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); -SELECT age('day', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); -SELECT age('hour', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); -SELECT age('minute', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); -SELECT age('second', toDateTime('2014-10-26 00:00:00', 'UTC'), toDateTime('2014-10-27 00:00:00', 'UTC')); - -SELECT 'Additional test'; - -SELECT number = age('month', now() - INTERVAL number MONTH, now()) FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/02477_age_date32.reference b/tests/queries/0_stateless/02477_age_date32.reference deleted file mode 100644 index 69f27a10acc..00000000000 --- a/tests/queries/0_stateless/02477_age_date32.reference +++ /dev/null @@ -1,169 +0,0 @@ --- { echo } - --- Date32 vs Date32 -SELECT age('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -86400 -SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -1440 -SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -24 -SELECT age('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -1 -SELECT age('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); -1 -SELECT age('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); -1 -SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); -1 -SELECT age('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); -1 --- With DateTime64 --- Date32 vs DateTime64 -SELECT age('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -86400 -SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -1440 -SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -24 -SELECT age('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -1 -SELECT age('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); -1 -SELECT age('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); -1 -SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); -1 -SELECT age('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); -1 --- DateTime64 vs Date32 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -86400 -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -1440 -SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -24 -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -1 -SELECT age('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); -1 -SELECT age('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); -1 -SELECT age('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); -1 -SELECT age('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); -1 --- With DateTime --- Date32 vs DateTime -SELECT age('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -86400 -SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -1440 -SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -24 -SELECT age('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -1 -SELECT age('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); -1 -SELECT age('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); -1 -SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); -1 -SELECT age('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); -1 --- DateTime vs Date32 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -86400 -SELECT age('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -1440 -SELECT age('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -24 -SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -1 -SELECT age('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); -1 -SELECT age('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); -1 -SELECT age('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); -1 -SELECT age('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); -1 --- With Date --- Date32 vs Date -SELECT age('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -86400 -SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -1440 -SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -24 -SELECT age('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -1 -SELECT age('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); -1 -SELECT age('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); -1 -SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); -1 -SELECT age('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); -1 --- Date vs Date32 -SELECT age('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -86400 -SELECT age('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -1440 -SELECT age('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -24 -SELECT age('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -1 -SELECT age('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); -1 -SELECT age('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); -1 -SELECT age('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); -1 -SELECT age('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); -1 --- Const vs non-const columns -SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -1 -SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); -1 -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -1 -SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); -1 -SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); -1 -SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); -1 -SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); -1 --- Non-const vs const columns -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); -1 -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -1 -SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); -1 -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -1 -SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); -1 -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); -1 -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); -1 --- Non-const vs non-const columns -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -1 -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); -1 -SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -1 -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); -1 -SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); -1 -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); -1 -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); -1 diff --git a/tests/queries/0_stateless/02477_age_date32.sql b/tests/queries/0_stateless/02477_age_date32.sql deleted file mode 100644 index 43ff458c2d1..00000000000 --- a/tests/queries/0_stateless/02477_age_date32.sql +++ /dev/null @@ -1,101 +0,0 @@ --- { echo } - --- Date32 vs Date32 -SELECT age('second', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('day', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('week', toDate32('1927-01-01', 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); -SELECT age('month', toDate32('1927-01-01', 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); -SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); -SELECT age('year', toDate32('1927-01-01', 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); - --- With DateTime64 --- Date32 vs DateTime64 -SELECT age('second', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('minute', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('hour', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('day', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('week', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-01-08 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('month', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('quarter', toDate32('1927-01-01', 'UTC'), toDateTime64('1927-04-01 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('year', toDate32('1927-01-01', 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC'), 'UTC'); - --- DateTime64 vs Date32 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('week', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-01-08', 'UTC'), 'UTC'); -SELECT age('month', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-02-01', 'UTC'), 'UTC'); -SELECT age('quarter', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1927-04-01', 'UTC'), 'UTC'); -SELECT age('year', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), toDate32('1928-01-01', 'UTC'), 'UTC'); - --- With DateTime --- Date32 vs DateTime -SELECT age('second', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -SELECT age('day', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -SELECT age('week', toDate32('2015-08-18', 'UTC'), toDateTime('2015-08-25 00:00:00', 'UTC'), 'UTC'); -SELECT age('month', toDate32('2015-08-18', 'UTC'), toDateTime('2015-09-18 00:00:00', 'UTC'), 'UTC'); -SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDateTime('2015-11-18 00:00:00', 'UTC'), 'UTC'); -SELECT age('year', toDate32('2015-08-18', 'UTC'), toDateTime('2016-08-18 00:00:00', 'UTC'), 'UTC'); - --- DateTime vs Date32 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('minute', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('hour', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('week', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); -SELECT age('month', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); -SELECT age('quarter', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); -SELECT age('year', toDateTime('2015-08-18 00:00:00', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); - --- With Date --- Date32 vs Date -SELECT age('second', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -SELECT age('minute', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -SELECT age('hour', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -SELECT age('day', toDate32('2015-08-18', 'UTC'), toDate('2015-08-19', 'UTC'), 'UTC'); -SELECT age('week', toDate32('2015-08-18', 'UTC'), toDate('2015-08-25', 'UTC'), 'UTC'); -SELECT age('month', toDate32('2015-08-18', 'UTC'), toDate('2015-09-18', 'UTC'), 'UTC'); -SELECT age('quarter', toDate32('2015-08-18', 'UTC'), toDate('2015-11-18', 'UTC'), 'UTC'); -SELECT age('year', toDate32('2015-08-18', 'UTC'), toDate('2016-08-18', 'UTC'), 'UTC'); - --- Date vs Date32 -SELECT age('second', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('minute', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('hour', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('day', toDate('2015-08-18', 'UTC'), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('week', toDate('2015-08-18', 'UTC'), toDate32('2015-08-25', 'UTC'), 'UTC'); -SELECT age('month', toDate('2015-08-18', 'UTC'), toDate32('2015-09-18', 'UTC'), 'UTC'); -SELECT age('quarter', toDate('2015-08-18', 'UTC'), toDate32('2015-11-18', 'UTC'), 'UTC'); -SELECT age('year', toDate('2015-08-18', 'UTC'), toDate32('2016-08-18', 'UTC'), 'UTC'); - --- Const vs non-const columns -SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -SELECT age('day', toDate32('1927-01-01', 'UTC'), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 3, 'UTC'), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); -SELECT age('day', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); -SELECT age('day', toDate32('2015-08-18', 'UTC'), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); -SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); - --- Non-const vs const columns -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), toDateTime64('1927-01-02 00:00:00', 3, 'UTC'), 'UTC'); -SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), toDate32('1927-01-02', 'UTC'), 'UTC'); -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDateTime('2015-08-19 00:00:00', 'UTC'), 'UTC'); -SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), toDate('2015-08-19', 'UTC'), 'UTC'); -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDate32('2015-08-19', 'UTC'), 'UTC'); - --- Non-const vs non-const columns -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -SELECT age('day', materialize(toDate32('1927-01-01', 'UTC')), materialize(toDateTime64('1927-01-02 00:00:00', 3, 'UTC')), 'UTC'); -SELECT age('day', materialize(toDateTime64('1927-01-01 00:00:00', 3, 'UTC')), materialize(toDate32('1927-01-02', 'UTC')), 'UTC'); -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDateTime('2015-08-19 00:00:00', 'UTC')), 'UTC'); -SELECT age('day', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); -SELECT age('day', materialize(toDate32('2015-08-18', 'UTC')), materialize(toDate('2015-08-19', 'UTC')), 'UTC'); -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDate32('2015-08-19', 'UTC')), 'UTC'); diff --git a/tests/queries/0_stateless/02477_age_datetime64.reference b/tests/queries/0_stateless/02477_age_datetime64.reference deleted file mode 100644 index 3b4459dd26d..00000000000 --- a/tests/queries/0_stateless/02477_age_datetime64.reference +++ /dev/null @@ -1,113 +0,0 @@ --- { echo } - --- DateTime64 vs DateTime64 same scale -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); -10 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); -600 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:00:00', 0, 'UTC')); -3600 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:10:10', 0, 'UTC')); -4210 -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); -10 -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); -600 -SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); -10 -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-02 00:00:00', 0, 'UTC')); -1 -SELECT age('month', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-02-01 00:00:00', 0, 'UTC')); -1 -SELECT age('year', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1928-01-01 00:00:00', 0, 'UTC')); -1 --- DateTime64 vs DateTime64 different scale -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); -10 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); -600 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:00:00', 3, 'UTC')); -3600 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:10:10', 3, 'UTC')); -4210 -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); -10 -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); -600 -SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); -10 -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC')); -1 -SELECT age('month', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC')); -1 -SELECT age('year', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC')); -1 --- With DateTime --- DateTime64 vs DateTime -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:00', 'UTC')); -0 -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:10', 'UTC')); -10 -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:10:00', 'UTC')); -600 -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:00:00', 'UTC')); -3600 -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:10:10', 'UTC')); -4210 --- DateTime vs DateTime64 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:00', 3, 'UTC')); -0 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); -10 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:10:00', 3, 'UTC')); -600 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:00:00', 3, 'UTC')); -3600 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:10:10', 3, 'UTC')); -4210 --- With Date --- DateTime64 vs Date -SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDate('2015-08-19', 'UTC')); -1 --- Date vs DateTime64 -SELECT age('day', toDate('2015-08-18', 'UTC'), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); -1 --- Same thing but const vs non-const columns -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); -10 -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); -10 -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); -10 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); -10 -SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); -1 -SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); -1 --- Same thing but non-const vs const columns -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); -10 -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); -10 -SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDateTime('2015-08-18 00:00:10', 'UTC')); -10 -SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); -10 -SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDate('2015-08-19', 'UTC')); -1 -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); -1 --- Same thing but non-const vs non-const columns -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); -10 -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); -10 -SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); -10 -SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); -10 -SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); -1 -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); -1 diff --git a/tests/queries/0_stateless/02477_age_datetime64.sql b/tests/queries/0_stateless/02477_age_datetime64.sql deleted file mode 100644 index 1bed93991ca..00000000000 --- a/tests/queries/0_stateless/02477_age_datetime64.sql +++ /dev/null @@ -1,77 +0,0 @@ --- { echo } - --- DateTime64 vs DateTime64 same scale -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:00:00', 0, 'UTC')); -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 01:10:10', 0, 'UTC')); - -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 00:10:00', 0, 'UTC')); -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); - -SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-01 10:00:00', 0, 'UTC')); - -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-01-02 00:00:00', 0, 'UTC')); -SELECT age('month', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1927-02-01 00:00:00', 0, 'UTC')); -SELECT age('year', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), toDateTime64('1928-01-01 00:00:00', 0, 'UTC')); - --- DateTime64 vs DateTime64 different scale -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:00:00', 3, 'UTC')); -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 01:10:10', 3, 'UTC')); - -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 00:10:00', 3, 'UTC')); -SELECT age('minute', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); - -SELECT age('hour', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-01 10:00:00', 3, 'UTC')); - -SELECT age('day', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-01-02 00:00:00', 3, 'UTC')); -SELECT age('month', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1927-02-01 00:00:00', 3, 'UTC')); -SELECT age('year', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), toDateTime64('1928-01-01 00:00:00', 3, 'UTC')); - --- With DateTime --- DateTime64 vs DateTime -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:00', 'UTC')); -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:00:10', 'UTC')); -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 00:10:00', 'UTC')); -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:00:00', 'UTC')); -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDateTime('2015-08-18 01:10:10', 'UTC')); - --- DateTime vs DateTime64 -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:00', 3, 'UTC')); -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 00:10:00', 3, 'UTC')); -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:00:00', 3, 'UTC')); -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), toDateTime64('2015-08-18 01:10:10', 3, 'UTC')); - --- With Date --- DateTime64 vs Date -SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), toDate('2015-08-19', 'UTC')); - --- Date vs DateTime64 -SELECT age('day', toDate('2015-08-18', 'UTC'), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); - --- Same thing but const vs non-const columns -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 0, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); -SELECT age('second', toDateTime64('1927-01-01 00:00:00', 6, 'UTC'), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); -SELECT age('second', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); -SELECT age('second', toDateTime('2015-08-18 00:00:00', 'UTC'), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); -SELECT age('day', toDateTime64('2015-08-18 00:00:00', 0, 'UTC'), materialize(toDate('2015-08-19', 'UTC'))); -SELECT age('day', toDate('2015-08-18', 'UTC'), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); - --- Same thing but non-const vs const columns -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), toDateTime64('1927-01-01 00:00:10', 0, 'UTC')); -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), toDateTime64('1927-01-01 00:00:10', 3, 'UTC')); -SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDateTime('2015-08-18 00:00:10', 'UTC')); -SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), toDateTime64('2015-08-18 00:00:10', 3, 'UTC')); -SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), toDate('2015-08-19', 'UTC')); -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), toDateTime64('2015-08-19 00:00:00', 3, 'UTC')); - --- Same thing but non-const vs non-const columns -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 0, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 0, 'UTC'))); -SELECT age('second', materialize(toDateTime64('1927-01-01 00:00:00', 6, 'UTC')), materialize(toDateTime64('1927-01-01 00:00:10', 3, 'UTC'))); -SELECT age('second', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDateTime('2015-08-18 00:00:10', 'UTC'))); -SELECT age('second', materialize(toDateTime('2015-08-18 00:00:00', 'UTC')), materialize(toDateTime64('2015-08-18 00:00:10', 3, 'UTC'))); -SELECT age('day', materialize(toDateTime64('2015-08-18 00:00:00', 0, 'UTC')), materialize(toDate('2015-08-19', 'UTC'))); -SELECT age('day', materialize(toDate('2015-08-18', 'UTC')), materialize(toDateTime64('2015-08-19 00:00:00', 3, 'UTC'))); diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.reference b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.reference new file mode 100644 index 00000000000..53cdf1e9393 --- /dev/null +++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.reference @@ -0,0 +1 @@ +PASSED diff --git a/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh new file mode 100755 index 00000000000..5d9844d5030 --- /dev/null +++ b/tests/queries/0_stateless/02479_race_condition_between_insert_and_droppin_mv.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +# Tags: no-random-settings, no-fasttest, long + +set -e + +CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL="error" + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +function insert { + i=0 + offset=500 + while true; + do + ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_race_condition_landing SELECT number, toString(number), toString(number) from system.numbers limit $i, $offset" + i=$(( $i + $RANDOM % 100 + 400 )) + done +} + +function drop_mv { + index=$1 + while true; + do + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_mv_$index" + ${CLICKHOUSE_CLIENT} -q "CREATE MATERIALIZED VIEW IF NOT EXISTS test_race_condition_mv1_$index TO test_race_condition_target AS select count() as number FROM (SELECT a.number, a.n, a.n2, b.number, b.n, b.n2, c.number, c.n, c.n2 FROM test_race_condition_landing a CROSS JOIN test_race_condition_landing b CROSS JOIN test_race_condition_landing c)" + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_mv1_$index" + ${CLICKHOUSE_CLIENT} -q "CREATE MATERIALIZED VIEW IF NOT EXISTS test_race_condition_mv_$index TO test_race_condition_target AS select count() as number FROM (SELECT a.number, a.n, a.n2, b.number, b.n, b.n2, c.number, c.n, c.n2 FROM test_race_condition_landing a CROSS JOIN test_race_condition_landing b CROSS JOIN test_race_condition_landing c)" + done +} + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_target" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_landing" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE test_race_condition_target (number Int64) Engine=MergeTree ORDER BY number" +${CLICKHOUSE_CLIENT} -q "CREATE TABLE test_race_condition_landing (number Int64, n String, n2 String) Engine=MergeTree ORDER BY number" + +export -f drop_mv; +export -f insert; + +TIMEOUT=55 + +for i in {1..4} +do + timeout $TIMEOUT bash -c drop_mv $i & +done + +for i in {1..4} +do + timeout $TIMEOUT bash -c insert 20 & +done + +wait + + +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_target" +${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_landing" +for i in {1..4} +do + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_mv_$i" + ${CLICKHOUSE_CLIENT} -q "DROP TABLE IF EXISTS test_race_condition_mv1_$i" +done + + +echo "PASSED" diff --git a/tests/queries/0_stateless/02481_async_insert_dedup.python b/tests/queries/0_stateless/02481_async_insert_dedup.python index fac031434b4..16808aeb7a2 100644 --- a/tests/queries/0_stateless/02481_async_insert_dedup.python +++ b/tests/queries/0_stateless/02481_async_insert_dedup.python @@ -68,7 +68,7 @@ def generate_data(q, total_number): def fetch_and_insert_data(q, client): while True: insert = q.get() - client.query(insert, settings = {"async_insert": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0}) + client.query(insert, settings = {"async_insert": 1, "async_insert_deduplicate": 1, "wait_for_async_insert": 0, "async_insert_busy_timeout_ms": 1500, "insert_keeper_fault_injection_probability": 0}) q.task_done() sleep_time = random.randint(50, 500) time.sleep(sleep_time/1000.0) @@ -104,13 +104,17 @@ while (True): result = result.split() err = False errMsg = "" - for i in range(total_number): - expect = str(i+1) - real = result[i] - if expect != real: - err = True - errMsg = "error, {} is not equal to {} for {}-th elements, total rows is {}".format(real, expect, i, len(result)) - break + if len(result) != total_number: + err = True + errMsg = f"the size of result is {len(result)}. we expect {total_number}." + else: + for i in range(total_number): + expect = str(i+1) + real = result[i] + if expect != real: + err = True + errMsg = f"error, real value {real} is not equal to expect value {expect} for {i}-th elements" + break # retry several times to get stable results. if err and retry >= 5: print (errMsg, flush=True) diff --git a/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference new file mode 100644 index 00000000000..5bad96d521c --- /dev/null +++ b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.reference @@ -0,0 +1,7 @@ +1 2 +3 4 +1 2 +3 4 +ab|c de&f +ab|c de*f gh&k +|av *ad &ad diff --git a/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh new file mode 100755 index 00000000000..ec986a4d16e --- /dev/null +++ b/tests/queries/0_stateless/02481_custom_separated_and_template_with_csv_field.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo "1||2&&3||4&&" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='&&' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo "1||2|||3||4|||" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='|||' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo "ab|c||de&f&&" | $CLICKHOUSE_LOCAL --input-format=CustomSeparated --format_custom_field_delimiter='||' --format_custom_row_after_delimiter='&&' --format_custom_escaping_rule='CSV' -q "select * from table" + +echo -e "\${column_1:CSV}||\${column_2:CSV}**\${column_3:CSV}&&" > row_format_02481 + +echo -e "ab|c||de*f**gh&k&&\n|av||*ad**&ad&&" | $CLICKHOUSE_LOCAL -q "select * from table" --input-format=Template --format_template_row='row_format_02481' --format_template_rows_between_delimiter "" + +rm row_format_02481 + diff --git a/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.reference b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.reference new file mode 100644 index 00000000000..a7eb5000556 --- /dev/null +++ b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.reference @@ -0,0 +1,6 @@ +10000000001 +10000000002 +100000000010000000000 +100000000010000000000 +14 +15 diff --git a/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.sql b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.sql new file mode 100644 index 00000000000..f1a17df5fe5 --- /dev/null +++ b/tests/queries/0_stateless/02482_execute_functions_before_sorting_bug.sql @@ -0,0 +1,9 @@ +set allow_suspicious_low_cardinality_types=1; +drop table if exists test; +create table test (x LowCardinality(Int32)) engine=Memory; +insert into test select 1; +insert into test select 2; +select x + 1e10 from test order by 1e10, x; +select x + (1e10 + 1e20) from test order by (1e10 + 1e20), x; +select x + (pow(2, 2) + pow(3, 2)) from test order by (pow(2,2) + pow(3, 2)), x; +drop table test; diff --git a/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.reference b/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.reference new file mode 100644 index 00000000000..34dc0b28146 --- /dev/null +++ b/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.reference @@ -0,0 +1 @@ +Replicated(\'some/path/default/replicated_database_test\', \'shard_1\', \'replica_1\') SETTINGS max_broken_tables_ratio = 1 diff --git a/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.sql b/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.sql new file mode 100644 index 00000000000..1f35a0f6273 --- /dev/null +++ b/tests/queries/0_stateless/02483_add_engine_full_column_to_system_databases.sql @@ -0,0 +1,8 @@ +-- Tags: no-parallel + +DROP DATABASE IF EXISTS replicated_database_test; +SET allow_experimental_database_replicated=1; +CREATE DATABASE IF NOT EXISTS replicated_database_test ENGINE = Replicated('some/path/' || currentDatabase() || '/replicated_database_test', 'shard_1', 'replica_1') SETTINGS max_broken_tables_ratio=1; +SELECT engine_full FROM system.databases WHERE name = 'replicated_database_test'; +DROP DATABASE IF EXISTS replicated_database_test; + diff --git a/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.reference b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.reference new file mode 100644 index 00000000000..88d21f535eb --- /dev/null +++ b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.reference @@ -0,0 +1 @@ +Hello 02483_data.LineAsString diff --git a/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.sql b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.sql new file mode 100644 index 00000000000..2a0e5e7495b --- /dev/null +++ b/tests/queries/0_stateless/02483_check_virtuals_shile_using_structure_from_insertion_table.sql @@ -0,0 +1,9 @@ +-- Tags: no-parallel + +drop table if exists test; +create table test (line String, _file String, _path String) engine=Memory; +insert into function file(02483_data.LineAsString) select 'Hello' settings engine_file_truncate_on_insert=1; +set use_structure_from_insertion_table_in_table_functions=2; +insert into test select *, _file, _path from file(02483_data.LineAsString); +select line, _file from test; +drop table test; diff --git a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect new file mode 100755 index 00000000000..a391756ba22 --- /dev/null +++ b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.expect @@ -0,0 +1,40 @@ +#!/usr/bin/expect -f + +set basedir [file dirname $argv0] +set basename [file tail $argv0] +exp_internal -f $env(CLICKHOUSE_TMP)/$basename.debuglog 0 + +log_user 0 +set timeout 60 +match_max 100000 +set stty_init "rows 25 cols 120" + +expect_after { + eof { exp_continue } + timeout { exit 1 } +} + +spawn bash +send "source $basedir/../shell_config.sh\r" + +send "\$CLICKHOUSE_CLIENT --query 'select 0b'\r" +expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" + +send "\$CLICKHOUSE_CLIENT --query 'select 0b;'\r" +expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" + +send "\$CLICKHOUSE_CLIENT --query 'select 0b ;'\r" +expect "DB::Exception: Missing columns: '0b' while processing query: 'SELECT `0b`', required columns: '0b'. (UNKNOWN_IDENTIFIER)" + + +send "\$CLICKHOUSE_CLIENT --query 'select 0x'\r" +expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" + +send "\$CLICKHOUSE_CLIENT --query 'select 0x;'\r" +expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" + +send "\$CLICKHOUSE_CLIENT --query 'select 0x ;'\r" +expect "DB::Exception: Missing columns: '0x' while processing query: 'SELECT `0x`', required columns: '0x'. (UNKNOWN_IDENTIFIER)" + +send "exit\r" +expect eof diff --git a/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.reference b/tests/queries/0_stateless/02493_inconsistent_hex_and_binary_number.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02493_numeric_literals_with_underscores.reference b/tests/queries/0_stateless/02493_numeric_literals_with_underscores.reference new file mode 100644 index 00000000000..482ad63ffeb --- /dev/null +++ b/tests/queries/0_stateless/02493_numeric_literals_with_underscores.reference @@ -0,0 +1,126 @@ +1234 +1234 +1234 +1234 +1234 +1234 +-1234 +-1234 +-1234 +12.34 +12.34 +12.34 +12.34 +-12.34 +-12.34 +-12.34 +-12.34 +3.4e22 +3.4e22 +3.4e22 +3.4e22 +3.4e22 +3.4e22 +3.4e22 +3.4e22 +3.4e-20 +3.4e-20 +3.4e-20 +3.4e-20 +-3.4e22 +-3.4e22 +-3.4e22 +-3.4e22 +-3.4e22 +-3.4e22 +-3.4e22 +-3.4e22 +-3.4e-20 +-3.4e-20 +-3.4e-20 +-3.4e-20 +1.34e21 +1.34e21 +1.34e21 +1.34e21 +1.34e21 +1.34e21 +1.34e21 +1.34e21 +1.34e-21 +1.34e-21 +1.34e-21 +1.34e-21 +-1.34e21 +-1.34e21 +-1.34e21 +-1.34e21 +-1.34e21 +-1.34e21 +-1.34e21 +-1.34e21 +-1.34e-21 +-1.34e-21 +-1.34e-21 +-1.34e-21 +-340000000000000000000 +-340000000000000000000 +-340000000000000000000 +-340000000000000000000 +-340000000000000000000 +-340000000000000000000 +-340000000000000000000 +-340000000000000000000 +-3.4e-22 +-3.4e-22 +-3.4e-22 +-3.4e-22 +nan +nan +inf +inf +-inf +inf +inf +-inf +inf +inf +-inf +15 +15 +15 +-15 +-15 +-15 +4660 +4660 +4660 +-4660 +-4660 +-4660 +238 +238 +1.1376953125 +1.1376953125 +-1.1376953125 +-1.1376953125 +0.9296875 +0.9296875 +2.275390625 +2.275390625 +2.275390625 +2.275390625 +2.275390625 +2.275390625 +0.56884765625 +0.56884765625 +0.56884765625 +-2.275390625 +-2.275390625 +-2.275390625 +-2.275390625 +-2.275390625 +-2.275390625 +-0.56884765625 +-0.56884765625 +-0.56884765625 diff --git a/tests/queries/0_stateless/02493_numeric_literals_with_underscores.sql b/tests/queries/0_stateless/02493_numeric_literals_with_underscores.sql new file mode 100644 index 00000000000..a0042fac4c4 --- /dev/null +++ b/tests/queries/0_stateless/02493_numeric_literals_with_underscores.sql @@ -0,0 +1,154 @@ +SELECT 1234; -- Positive integer (+ implied) +SELECT 1_234; +SELECT 1_2_3_4; +SELECT +1234; -- Positive integer (+ explicit) +SELECT +1_234; +SELECT +1_2_3_4; +SELECT -1234; -- Negative integer +SELECT -1_234; +SELECT -1_2_3_4; +SELECT 12.34; -- Positive floating point with . notation +SELECT 12.3_4; +SELECT 1_2.34; +SELECT 1_2.3_4; +SELECT -12.34; -- Negative floating point with . notation +SELECT -12.3_4; +SELECT -1_2.34; +SELECT -1_2.3_4; +SELECT 34e21; -- Positive floating point with positive scientific notation (+ implied) +SELECT 3_4e21; +SELECT 34e2_1; +SELECT 3_4e2_1; +SELECT 34e+21; -- Positive floating point with positive scientific notation (+ explicit) +SELECT 3_4e+21; +SELECT 34e+2_1; +SELECT 3_4e+2_1; +SELECT 34e-21; -- Positive floating point with negative scientific notation +SELECT 3_4e-21; +SELECT 34e-2_1; +SELECT 3_4e-2_1; +SELECT -34e21; -- Negative floating point with positive scientific notation (+ implied) +SELECT -3_4e21; +SELECT -34e2_1; +SELECT -3_4e2_1; +SELECT -34e+21; -- Negative floating point with positive scientific notation (+ explicit) +SELECT -3_4e+21; +SELECT -34e+2_1; +SELECT -3_4e+2_1; +SELECT -34e-21; -- Negative floating point with negative scientific notation +SELECT -3_4e-21; +SELECT -34e-2_1; +SELECT -3_4e-2_1; +SELECT 1.34e21; -- Positive floating point (with .) with positive scientific notation (+ implied) +SELECT 1.3_4e21; +SELECT 1.34e2_1; +SELECT 1.3_4e2_1; +SELECT 1.34e+21; -- Positive floating point (with .) with positive scientific notation (+ explicit) +SELECT 1.3_4e+21; +SELECT 1.34e+2_1; +SELECT 1.3_4e+2_1; +SELECT 1.34e-21; -- Positive floating point (with .) with negative scientific notation +SELECT 1.3_4e-21; +SELECT 1.34e-2_1; +SELECT 1.3_4e-2_1; +SELECT -1.34e21; -- Negative floating point (with .) with positive scientific notation (+ implied) +SELECT -1.3_4e21; +SELECT -1.34e2_1; +SELECT -1.3_4e2_1; +SELECT -1.34e+21; -- Negative floating point (with .) with positive scientific notation (+ explicit) +SELECT -1.3_4e+21; +SELECT -1.34e+2_1; +SELECT -1.3_4e+2_1; +SELECT -1.34e-21; -- Negative floating point (with .) with negative scientific notation +SELECT -1.3_4e-21; +SELECT -1.34e-2_1; +SELECT -1.3_4e-2_1; +SELECT -.34e21; -- Negative floating point (with .) with positive scientific notation (+ implied) +SELECT -.3_4e21; +SELECT -.34e2_1; +SELECT -.3_4e2_1; +SELECT -.34e+21; -- Negative floating point (with .) with positive scientific notation (+ explicit) +SELECT -.3_4e+21; +SELECT -.34e+2_1; +SELECT -.3_4e+2_1; +SELECT -.34e-21; -- Negative floating point (with .) with negative scientific notation +SELECT -.3_4e-21; +SELECT -.34e-2_1; +SELECT -.3_4e-2_1; +SELECT NaN; -- Specials +SELECT nan; +SELECT inf; +SELECT +inf; +SELECT -inf; +SELECT Inf; +SELECT +Inf; +SELECT -Inf; +SELECT INF; +SELECT +INF; +SELECT -INF; +SELECT 0b1111; -- Binary +SELECT 0b1_111; +SELECT 0b1_1_1_1; +SELECT -0b1111; +SELECT -0b1_111; +SELECT -0b1_1_1_1; +SELECT 0x1234; -- Hex +SELECT 0x1_234; +SELECT 0x1_2_3_4; +SELECT -0x1234; +SELECT -0x1_234; +SELECT -0x1_2_3_4; +SELECT 0xee; +SELECT 0xe_e; +SELECT 0x1.234; -- Hex fractions +SELECT 0x1.2_3_4; +SELECT -0x1.234; +SELECT -0x1.2_3_4; +SELECT 0x0.ee; +SELECT 0x0.e_e; +SELECT 0x1.234p01; -- Hex scientific notation +SELECT 0x1.2_34p01; +SELECT 0x1.234p0_1; +SELECT 0x1.234p+01; +SELECT 0x1.2_34p+01; +SELECT 0x1.2_34p+0_1; +SELECT 0x1.234p-01; +SELECT 0x1.2_34p-01; +SELECT 0x1.2_34p-0_1; +SELECT -0x1.234p01; +SELECT -0x1.2_34p01; +SELECT -0x1.2_34p0_1; +SELECT -0x1.234p+01; +SELECT -0x1.2_34p+01; +SELECT -0x1.2_34p+0_1; +SELECT -0x1.234p-01; +SELECT -0x1.2_34p-01; +SELECT -0x1.2_34p-0_1; + +-- Things that are not a number + +select _1000; -- { serverError UNKNOWN_IDENTIFIER } +select _1000 FROM (SELECT 1 AS _1000) FORMAT Null; +select -_1; -- { serverError UNKNOWN_IDENTIFIER } +select -_1 FROM (SELECT -1 AS _1) FORMAT Null; +select +_1; -- { clientError SYNTAX_ERROR } +select 1__0; -- { serverError UNKNOWN_IDENTIFIER } +select 1_; -- { serverError UNKNOWN_IDENTIFIER } +select 1_ ; -- { serverError UNKNOWN_IDENTIFIER } +select 10_; -- { serverError UNKNOWN_IDENTIFIER } +select 1_e5; -- { serverError UNKNOWN_IDENTIFIER } +select 1e_5; -- { serverError UNKNOWN_IDENTIFIER } +select 1e5_; -- { serverError UNKNOWN_IDENTIFIER } +select 1e_; -- { serverError UNKNOWN_IDENTIFIER } +select 1_.; -- { clientError SYNTAX_ERROR } +select 1e_1; -- { serverError UNKNOWN_IDENTIFIER } +select 0_x2; -- { serverError UNKNOWN_IDENTIFIER } +select 0x2_p2; -- { serverError UNKNOWN_IDENTIFIER } +select 0x2p_2; -- { serverError UNKNOWN_IDENTIFIER } +select 0x2p2_; -- { serverError UNKNOWN_IDENTIFIER } +select 0b; -- { serverError UNKNOWN_IDENTIFIER } +select 0b ; -- { serverError UNKNOWN_IDENTIFIER } +select 0x; -- { serverError UNKNOWN_IDENTIFIER } +select 0x ; -- { serverError UNKNOWN_IDENTIFIER } +select 0x_; -- { serverError UNKNOWN_IDENTIFIER } +select 0x_1; -- { serverError UNKNOWN_IDENTIFIER } diff --git a/tests/queries/0_stateless/02494_array_function_range.reference b/tests/queries/0_stateless/02494_array_function_range.reference new file mode 100644 index 00000000000..133d78d4a37 --- /dev/null +++ b/tests/queries/0_stateless/02494_array_function_range.reference @@ -0,0 +1,19 @@ +1 +1 +1 +[-1,0] +[-1] +[] +[5,4,3,2,1] +[5,4,3,2,1,0] +[1] +[-5,-4,-3,-2,-1,0,1,2,3,4] +[-4,-3,-2,-1,0,1,2,3,4,5] +[-3,-2,-1,0,1,2,3,4,5,6] +[-2,-1,0,1,2,3,4,5,6,7] +[-1,0,1,2,3,4,5,6,7,8] +[0,1,2,3,4,5,6,7,8,9] +[1,2,3,4,5,6,7,8,9,10] +[2,3,4,5,6,7,8,9,10,11] +[3,4,5,6,7,8,9,10,11,12] +[4,5,6,7,8,9,10,11,12,13] diff --git a/tests/queries/0_stateless/02494_array_function_range.sql b/tests/queries/0_stateless/02494_array_function_range.sql new file mode 100644 index 00000000000..bd945d55254 --- /dev/null +++ b/tests/queries/0_stateless/02494_array_function_range.sql @@ -0,0 +1,10 @@ +SELECT range(100) == range(0, 100) and range(0, 100) == range(0, 100, 1); +SELECT range(100) == range(cast('100', 'Int8')) and range(100) == range(cast('100', 'Int16')) and range(100) == range(cast('100', 'Int32')) and range(100) == range(cast('100', 'Int64')); +SELECT range(cast('100', 'Int8')) == range(0, cast('100', 'Int8')) and range(0, cast('100', 'Int8')) == range(0, cast('100', 'Int8'), 1) and range(0, cast('100', 'Int8')) == range(0, cast('100', 'Int8'), cast('1', 'Int8')); +SELECT range(-1, 1); +SELECT range(-1, 1, 2); +SELECT range(1,1); +SELECT range(5, 0, -1); +SELECT range(5, -1, -1); +SELECT range(1, 257, 65535); +SELECT range(cast(number - 5, 'Int8'), cast(number + 5, 'Int8')) from system.numbers limit 10; \ No newline at end of file diff --git a/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.reference b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.reference new file mode 100644 index 00000000000..5878ba47225 --- /dev/null +++ b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.reference @@ -0,0 +1,2 @@ +1000 +1000 diff --git a/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.sh b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.sh new file mode 100755 index 00000000000..b72c3eb56c7 --- /dev/null +++ b/tests/queries/0_stateless/02494_zero_copy_projection_cancel_fetch.sh @@ -0,0 +1,66 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -n --query " +DROP TABLE IF EXISTS wikistat1 SYNC; +DROP TABLE IF EXISTS wikistat2 SYNC; +" + +for i in {1..2}; do + $CLICKHOUSE_CLIENT --query " + CREATE TABLE wikistat$i + ( + time DateTime, + project LowCardinality(String), + subproject LowCardinality(String), + path String, + hits UInt64, + PROJECTION total + ( + SELECT + project, + subproject, + path, + sum(hits), + count() + GROUP BY + project, + subproject, + path + ) + ) + ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/02494_zero_copy_projection_cancel_fetch', '$i') + ORDER BY (path, time) + SETTINGS min_bytes_for_wide_part = 0, storage_policy = 's3_cache', + allow_remote_fs_zero_copy_replication = 1, + max_replicated_fetches_network_bandwidth = 100 + " +done + +$CLICKHOUSE_CLIENT --query "SYSTEM STOP FETCHES wikistat2" +$CLICKHOUSE_CLIENT --query "INSERT INTO wikistat1 SELECT toDateTime('2020-10-01 00:00:00'), 'hello', 'world', '/data/path', 10 from numbers(1000)" + +$CLICKHOUSE_CLIENT --query "SYSTEM START FETCHES wikistat2" +$CLICKHOUSE_CLIENT --query "SYSTEM SYNC REPLICA wikistat2" & + +# With previous versions LOGICAL_ERROR will be thrown +# and server will be crashed in debug mode. +sleep 1.5 +$CLICKHOUSE_CLIENT --query "SYSTEM STOP FETCHES wikistat2" +sleep 1.5 + +$CLICKHOUSE_CLIENT --query "ALTER TABLE wikistat2 MODIFY SETTING max_replicated_fetches_network_bandwidth = 0" +$CLICKHOUSE_CLIENT --query "SYSTEM START FETCHES wikistat2" +wait + +$CLICKHOUSE_CLIENT --query "SELECT count() FROM wikistat1 WHERE NOT ignore(*)" +$CLICKHOUSE_CLIENT --query "SELECT count() FROM wikistat2 WHERE NOT ignore(*)" + +$CLICKHOUSE_CLIENT -n --query " +DROP TABLE IF EXISTS wikistat1 SYNC; +DROP TABLE IF EXISTS wikistat2 SYNC; +" diff --git a/tests/queries/0_stateless/02495_analyzer_storage_join.reference b/tests/queries/0_stateless/02495_analyzer_storage_join.reference new file mode 100644 index 00000000000..509a79bc618 --- /dev/null +++ b/tests/queries/0_stateless/02495_analyzer_storage_join.reference @@ -0,0 +1,215 @@ +--- no name clashes --- +id2 id1 val key2 key1 a b x y +0 0 0 6 -6 60 600 6000 60000 +5 -5 55 5 -5 50 500 5000 50000 +4 -4 44 4 -4 40 400 4000 40000 +3 -3 33 3 -3 30 300 3000 30000 +2 -2 22 2 -2 20 200 2000 20000 +id1 val key1 b x +0 0 -6 600 6000 +-5 55 -5 500 5000 +-4 44 -4 400 4000 +-3 33 -3 300 3000 +-2 22 -2 200 2000 +id1 val key1 b x +0 0 -6 600 6000 +-5 55 -5 500 5000 +-4 44 -4 400 4000 +-3 33 -3 300 3000 +-2 22 -2 200 2000 +val b x +0 600 6000 +55 500 5000 +44 400 4000 +33 300 3000 +22 200 2000 +val +0 +55 +44 +33 +22 +x +6000 +5000 +4000 +3000 +2000 +--- name clashes --- +-- using -- +key1 key2 t.b t.x val a tj.b tj.x y +-6 6 0 0 0 60 600 6000 60000 +-5 5 55 555 5555 50 500 5000 50000 +-4 4 44 444 4444 40 400 4000 40000 +-3 3 33 333 2222 30 300 3000 30000 +-2 2 22 222 2222 20 200 2000 20000 +key1 +-6 +-5 +-4 +-3 +-2 +t.key1 tj.key1 +0 -6 +-5 -5 +-4 -4 +-3 -3 +-2 -2 +t.key2 tj.key2 +0 6 +5 5 +4 4 +3 3 +2 2 +t.b tj.b +0 600 +55 500 +44 400 +33 300 +22 200 +t.x tj.b +0 600 +555 500 +444 400 +333 300 +222 200 +a +60 +50 +40 +30 +20 +tj.b +600 +500 +400 +300 +200 +tj.x +6000 +5000 +4000 +3000 +2000 +y +60000 +50000 +40000 +30000 +20000 +a +60 +50 +40 +30 +20 +y +60000 +50000 +40000 +30000 +20000 +val +0 +5555 +4444 +2222 +2222 +val +0 +5555 +4444 +2222 +2222 +-- on -- +t.key2 t.key1 t.b t.x val tj.key2 tj.key1 a tj.b tj.x y +5 -5 55 555 5555 5 -5 50 500 5000 50000 +4 -4 44 444 4444 4 -4 40 400 4000 40000 +3 -3 33 333 2222 3 -3 30 300 3000 30000 +2 -2 22 222 2222 2 -2 20 200 2000 20000 +0 0 0 0 0 6 -6 60 600 6000 60000 +t.key1 tj.key1 +-5 -5 +-4 -4 +-3 -3 +-2 -2 +0 -6 +t.key2 tj.key2 +5 5 +4 4 +3 3 +2 2 +0 6 +t.b tj.b +55 500 +44 400 +33 300 +22 200 +0 600 +t.x tj.b +555 500 +444 400 +333 300 +222 200 +0 600 +a +50 +40 +30 +20 +60 +tj.b +500 +400 +300 +200 +600 +tj.x +5000 +4000 +3000 +2000 +6000 +y +50000 +40000 +30000 +20000 +60000 +a +50 +40 +30 +20 +60 +y +50000 +40000 +30000 +20000 +60000 +val +5555 +4444 +2222 +2222 +0 +val +5555 +4444 +2222 +2222 +0 +--- unsupported and illegal conditions --- +t.key2 t.key1 t.b t.x val tj.key2 tj.key1 a tj.b tj.x y +5 -5 55 555 5555 4 -4 40 400 4000 40000 +4 -4 44 444 4444 3 -3 30 300 3000 30000 +3 -3 33 333 2222 2 -2 20 200 2000 20000 +0 0 0 0 0 5 -5 50 500 5000 50000 +0 0 0 0 0 6 -6 60 600 6000 60000 +t.key2 t.key1 t.b t.x val tj.key2 tj.key1 a tj.b tj.x y +2 -2 22 222 2222 2 -2 20 200 2000 20000 +0 0 0 0 0 3 -3 30 300 3000 30000 +0 0 0 0 0 4 -4 40 400 4000 40000 +0 0 0 0 0 5 -5 50 500 5000 50000 +0 0 0 0 0 6 -6 60 600 6000 60000 diff --git a/tests/queries/0_stateless/02495_analyzer_storage_join.sql b/tests/queries/0_stateless/02495_analyzer_storage_join.sql new file mode 100644 index 00000000000..6a4c1e45d69 --- /dev/null +++ b/tests/queries/0_stateless/02495_analyzer_storage_join.sql @@ -0,0 +1,88 @@ +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS tj; + +SET allow_experimental_analyzer = 1; + +CREATE TABLE tj (key2 UInt64, key1 Int64, a UInt64, b UInt64, x UInt64, y UInt64) ENGINE = Join(ALL, RIGHT, key1, key2); +INSERT INTO tj VALUES (2, -2, 20, 200, 2000, 20000), (3, -3, 30, 300, 3000, 30000), (4, -4, 40, 400, 4000, 40000), (5, -5, 50, 500, 5000, 50000), (6, -6, 60, 600, 6000, 60000); + +SELECT '--- no name clashes ---'; + +CREATE TABLE t1 (id2 UInt64, id1 Int64, val UInt64) ENGINE = Memory; +INSERT INTO t1 VALUES (1, -1, 11), (2, -2, 22), (3, -3, 33), (4, -4, 44), (5, -5, 55); + +SELECT * FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT id1, val, key1, b, x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT t1.id1, t1.val, tj.key1, tj.b, tj.x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT val, b, x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT val FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; +SELECT x FROM t1 ALL RIGHT JOIN tj ON t1.id1 == tj.key1 AND t1.id2 == tj.key2 ORDER BY key1 FORMAT TSVWithNames; + +SELECT '--- name clashes ---'; + +CREATE TABLE t (key2 UInt64, key1 Int64, b UInt64, x UInt64, val UInt64) ENGINE = Memory; +INSERT INTO t VALUES (1, -1, 11, 111, 1111), (2, -2, 22, 222, 2222), (3, -3, 33, 333, 2222), (4, -4, 44, 444, 4444), (5, -5, 55, 555, 5555) + +SELECT '-- using --'; + +SELECT * FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT key1 FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.key1, tj.key1 FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.key2, tj.key2 FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.b, tj.b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.x, tj.b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.a FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.x FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT tj.y FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT a FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT b FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT x FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT y FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT t.val FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; +SELECT val FROM t ALL RIGHT JOIN tj USING (key1, key2) ORDER BY key1 FORMAT TSVWithNames; + +SELECT '-- on --'; + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT key1 FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT t.key1, tj.key1 FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.key2, tj.key2 FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.b, tj.b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.x, tj.b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.a FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.x FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT tj.y FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT a FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT b FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT x FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; -- { serverError AMBIGUOUS_IDENTIFIER } +SELECT y FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT t.val FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; +SELECT val FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 ORDER BY t.key1 FORMAT TSVWithNames; + +SELECT '--- unsupported and illegal conditions ---'; + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 + 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 + 1 == tj.key1 AND toUInt64(t.key2 - 1) == tj.key2 ORDER BY t.key1, tj.key2 FORMAT TSVWithNames; -- Ok: expression on the left table + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 == 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 == 2 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND tj.a == 20 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND t.b == 22 ORDER BY t.key1, tj.key2 FORMAT TSVWithNames; -- Ok: t.b from the left table + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 != 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND NULL FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 'aaa' FORMAT TSVWithNames; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } +SELECT * FROM t ALL RIGHT JOIN tj ON 'aaa' FORMAT TSVWithNames; -- { serverError INVALID_JOIN_ON_EXPRESSION } + +SELECT * FROM t ALL RIGHT JOIN tj ON t.key1 == tj.key1 AND t.key2 == tj.key2 AND 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON 0 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t ALL RIGHT JOIN tj ON 1 FORMAT TSVWithNames; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +DROP TABLE IF EXISTS t; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS tj; diff --git a/tests/queries/0_stateless/02495_concat_with_separator.reference b/tests/queries/0_stateless/02495_concat_with_separator.reference new file mode 100644 index 00000000000..8f0ea917f4b --- /dev/null +++ b/tests/queries/0_stateless/02495_concat_with_separator.reference @@ -0,0 +1,19 @@ +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +\N +\N +\N diff --git a/tests/queries/0_stateless/02495_concat_with_separator.sql b/tests/queries/0_stateless/02495_concat_with_separator.sql new file mode 100644 index 00000000000..916c4cda1b7 --- /dev/null +++ b/tests/queries/0_stateless/02495_concat_with_separator.sql @@ -0,0 +1,27 @@ +select concatWithSeparator('|', 'a', 'b') == 'a|b'; +select concatWithSeparator('|', 'a', materialize('b')) == 'a|b'; +select concatWithSeparator('|', materialize('a'), 'b') == 'a|b'; +select concatWithSeparator('|', materialize('a'), materialize('b')) == 'a|b'; + +select concatWithSeparator('|', 'a', toFixedString('b', 1)) == 'a|b'; +select concatWithSeparator('|', 'a', materialize(toFixedString('b', 1))) == 'a|b'; +select concatWithSeparator('|', materialize('a'), toFixedString('b', 1)) == 'a|b'; +select concatWithSeparator('|', materialize('a'), materialize(toFixedString('b', 1))) == 'a|b'; + +select concatWithSeparator('|', toFixedString('a', 1), 'b') == 'a|b'; +select concatWithSeparator('|', toFixedString('a', 1), materialize('b')) == 'a|b'; +select concatWithSeparator('|', materialize(toFixedString('a', 1)), 'b') == 'a|b'; +select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize('b')) == 'a|b'; + +select concatWithSeparator('|', toFixedString('a', 1), toFixedString('b', 1)) == 'a|b'; +select concatWithSeparator('|', toFixedString('a', 1), materialize(toFixedString('b', 1))) == 'a|b'; +select concatWithSeparator('|', materialize(toFixedString('a', 1)), toFixedString('b', 1)) == 'a|b'; +select concatWithSeparator('|', materialize(toFixedString('a', 1)), materialize(toFixedString('b', 1))) == 'a|b'; + +select concatWithSeparator(null, 'a', 'b') == null; +select concatWithSeparator('1', null, 'b') == null; +select concatWithSeparator('1', 'a', null) == null; + +select concatWithSeparator(materialize('|'), 'a', 'b'); -- { serverError 44 } +select concatWithSeparator(); -- { serverError 42 } +select concatWithSeparator('|', 'a', 100); -- { serverError 43 } diff --git a/tests/queries/0_stateless/02495_s3_filter_by_file.reference b/tests/queries/0_stateless/02495_s3_filter_by_file.reference new file mode 100644 index 00000000000..7cac31e2d5c --- /dev/null +++ b/tests/queries/0_stateless/02495_s3_filter_by_file.reference @@ -0,0 +1,2 @@ +5 5 9 +1 test_02495_1 diff --git a/tests/queries/0_stateless/02495_s3_filter_by_file.sql b/tests/queries/0_stateless/02495_s3_filter_by_file.sql new file mode 100644 index 00000000000..8d6d8a8a5a4 --- /dev/null +++ b/tests/queries/0_stateless/02495_s3_filter_by_file.sql @@ -0,0 +1,22 @@ +-- Tags: no-parallel, no-fasttest + +DROP TABLE IF EXISTS t_s3_filter_02495; + +CREATE TABLE t_s3_filter_02495 (a UInt64) +ENGINE = S3(s3_conn, filename = 'test_02495_{_partition_id}', format = Parquet) +PARTITION BY a; + +INSERT INTO t_s3_filter_02495 SELECT number FROM numbers(10) SETTINGS s3_truncate_on_insert=1; + +SET max_rows_to_read = 5; + +WITH splitByChar('_', _file)[3]::UInt64 AS num +SELECT count(), min(num), max(num) +FROM s3(s3_conn, filename = 'test_02495_*', format = Parquet) +WHERE num >= 5; + +SELECT *, _file +FROM s3(s3_conn, filename = 'test_02495_1', format = Parquet) +WHERE _file = 'test_02495_1'; + +DROP TABLE t_s3_filter_02495; diff --git a/tests/queries/0_stateless/02496_storage_s3_profile_events.reference b/tests/queries/0_stateless/02496_storage_s3_profile_events.reference new file mode 100644 index 00000000000..df850b51d6e --- /dev/null +++ b/tests/queries/0_stateless/02496_storage_s3_profile_events.reference @@ -0,0 +1,2 @@ +10 +0 1 10 1 diff --git a/tests/queries/0_stateless/02496_storage_s3_profile_events.sql b/tests/queries/0_stateless/02496_storage_s3_profile_events.sql new file mode 100644 index 00000000000..6ca4c3370d5 --- /dev/null +++ b/tests/queries/0_stateless/02496_storage_s3_profile_events.sql @@ -0,0 +1,23 @@ +-- Tags: no-parallel, no-fasttest, no-random-settings + +DROP TABLE IF EXISTS t_s3_events_02496; + +CREATE TABLE t_s3_events_02496 (a UInt64) +ENGINE = S3(s3_conn, filename = 'test_02496_{_partition_id}', format = Parquet) +PARTITION BY a; + +INSERT INTO t_s3_events_02496 SELECT number FROM numbers(10) SETTINGS s3_truncate_on_insert=1; + +SET max_threads = 1; +SELECT count() FROM s3(s3_conn, filename = 'test_02496_*', format = Parquet, structure = 'a UInt64'); +SYSTEM FLUSH LOGS; + +SELECT + ProfileEvents['S3HeadObject'], + ProfileEvents['S3ListObjects'], + ProfileEvents['RemoteFSPrefetches'], + ProfileEvents['IOBufferAllocBytes'] < 100000 +FROM system.query_log WHERE current_database = currentDatabase() +AND type = 'QueryFinish' AND query ILIKE 'SELECT count() FROM s3%test_02496%'; + +DROP TABLE t_s3_events_02496; diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference new file mode 100644 index 00000000000..06863f1858b --- /dev/null +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.reference @@ -0,0 +1,537 @@ +other +other +google +other +censor.net +other +yahoo +other +other +other +SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 4 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'other\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + JOIN TREE + TABLE id: 7, table_name: system.numbers + LIMIT + CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt8 +google +google +google +google +google +google +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + if(greater(number, 5), \'censor.net\', \'google\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 3 + FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'google\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + JOIN TREE + TABLE id: 9, table_name: system.numbers + LIMIT + CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt8 +other1 +other1 +google1 +other1 +censor.net1 +other1 +yahoo1 +other1 +other1 +other1 +SELECT concat(transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\'), \'1\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + CONCAT(transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\'), \'1\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: concat, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 7, nodes: 4 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 14, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'other\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + CONSTANT id: 19, constant_value: \'1\', constant_value_type: String + JOIN TREE + TABLE id: 9, table_name: system.numbers + LIMIT + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 +google1 +google1 +google1 +google1 +google1 +google1 +censor.net1 +censor.net1 +censor.net1 +censor.net1 +SELECT concat(if(number > 5, \'censor.net\', \'google\'), \'1\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + CONCAT(if(greater(number, 5), \'censor.net\', \'google\'), \'1\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: concat, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 2 + FUNCTION id: 4, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 1 + FUNCTION id: 6, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 7, nodes: 3 + FUNCTION id: 8, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 9, nodes: 2 + COLUMN id: 10, column_name: number, result_type: UInt64, source_id: 11 + CONSTANT id: 12, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 17, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 18, nodes: 2 + CONSTANT id: 19, constant_value: \'google\', constant_value_type: String + CONSTANT id: 20, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + CONSTANT id: 21, constant_value: \'1\', constant_value_type: String + JOIN TREE + TABLE id: 11, table_name: system.numbers + LIMIT + CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt8 +google +google +google +google +google +google +censor.net +censor.net +censor.net +censor.net +SELECT value +FROM +( + SELECT if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) AS value + FROM system.numbers + LIMIT 10 +) AS t1 +QUERY id: 0 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: String, source_id: 3 + JOIN TREE + QUERY id: 3, alias: t1, is_subquery: 1 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 6, nodes: 1 + FUNCTION id: 7, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 8, nodes: 3 + FUNCTION id: 9, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 10, nodes: 2 + COLUMN id: 11, column_name: number, result_type: UInt64, source_id: 12 + CONSTANT id: 13, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 14, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 15, nodes: 2 + CONSTANT id: 16, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 17, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 18, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 19, nodes: 2 + CONSTANT id: 20, constant_value: \'google\', constant_value_type: String + CONSTANT id: 21, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + JOIN TREE + TABLE id: 12, table_name: system.numbers + LIMIT + CONSTANT id: 22, constant_value: UInt64_10, constant_value_type: UInt8 +other +other +google +other +censor.net +other +yahoo +other +other +other +SELECT value +FROM +( + SELECT transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) AS value + FROM system.numbers + LIMIT 10 +) AS t1 +QUERY id: 0 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: value, result_type: String, source_id: 3 + JOIN TREE + QUERY id: 3, alias: t1, is_subquery: 1 + PROJECTION COLUMNS + value String + PROJECTION + LIST id: 4, nodes: 1 + FUNCTION id: 5, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 6, nodes: 1 + FUNCTION id: 7, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 8, nodes: 4 + COLUMN id: 9, column_name: number, result_type: UInt64, source_id: 10 + CONSTANT id: 11, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 12, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 13, nodes: 2 + CONSTANT id: 14, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 15, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 16, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 17, nodes: 2 + CONSTANT id: 18, constant_value: \'other\', constant_value_type: String + CONSTANT id: 19, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + JOIN TREE + TABLE id: 10, table_name: system.numbers + LIMIT + CONSTANT id: 20, constant_value: UInt64_10, constant_value_type: UInt8 +google google +google google +google google +google google +google google +google google +censor.net censor.net +censor.net censor.net +censor.net censor.net +censor.net censor.net +SELECT + if(number > 5, _CAST(\'censor.net\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\'), _CAST(\'google\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\')) AS value, + value +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + value String + value String + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 3 + FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'google\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 3 + FUNCTION id: 6, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 7, nodes: 2 + COLUMN id: 8, column_name: number, result_type: UInt64, source_id: 9 + CONSTANT id: 10, constant_value: UInt64_5, constant_value_type: UInt8 + FUNCTION id: 11, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 12, nodes: 2 + CONSTANT id: 13, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 14, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + FUNCTION id: 15, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2) + ARGUMENTS + LIST id: 16, nodes: 2 + CONSTANT id: 17, constant_value: \'google\', constant_value_type: String + CONSTANT id: 18, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2)\', constant_value_type: String + JOIN TREE + TABLE id: 9, table_name: system.numbers + LIMIT + CONSTANT id: 19, constant_value: UInt64_10, constant_value_type: UInt8 +other other +other other +google google +other other +censor.net censor.net +other other +yahoo yahoo +other other +other other +other other +SELECT + transform(number, [2, 4, 6], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) AS value, + value +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + value String + value String + PROJECTION + LIST id: 1, nodes: 2 + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 4 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'other\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + FUNCTION id: 2, function_name: toString, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 1 + FUNCTION id: 4, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 5, nodes: 4 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + FUNCTION id: 9, function_name: _CAST, function_type: ordinary, result_type: Array(Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4)) + ARGUMENTS + LIST id: 10, nodes: 2 + CONSTANT id: 11, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 12, constant_value: \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\', constant_value_type: String + FUNCTION id: 13, function_name: _CAST, function_type: ordinary, result_type: Enum8(\'censor.net\' = 1, \'google\' = 2, \'other\' = 3, \'yahoo\' = 4) + ARGUMENTS + LIST id: 14, nodes: 2 + CONSTANT id: 15, constant_value: \'other\', constant_value_type: String + CONSTANT id: 16, constant_value: \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\', constant_value_type: String + JOIN TREE + TABLE id: 7, table_name: system.numbers + LIMIT + CONSTANT id: 17, constant_value: UInt64_10, constant_value_type: UInt8 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +SELECT transform(number, [NULL], _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) +FROM +( + SELECT NULL AS number + FROM system.numbers + LIMIT 10 +) +QUERY id: 0 + PROJECTION COLUMNS + transform(number, [NULL], [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing) + ARGUMENTS + LIST id: 3, nodes: 4 + COLUMN id: 4, column_name: number, result_type: Nullable(Nothing), source_id: 5 + CONSTANT id: 6, constant_value: Array_[NULL], constant_value_type: Array(Nullable(Nothing)) + CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 8, constant_value: \'other\', constant_value_type: String + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + number Nullable(Nothing) + PROJECTION + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: NULL, constant_value_type: Nullable(Nothing) + JOIN TREE + TABLE id: 11, table_name: system.numbers + LIMIT + CONSTANT id: 12, constant_value: UInt64_10, constant_value_type: UInt8 +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +SELECT transform(number, NULL, _CAST([\'google\', \'censor.net\', \'yahoo\'], \'Array(Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4))\'), _CAST(\'other\', \'Enum8(\\\'censor.net\\\' = 1, \\\'google\\\' = 2, \\\'other\\\' = 3, \\\'yahoo\\\' = 4)\')) +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + transform(number, NULL, [\'google\', \'censor.net\', \'yahoo\'], \'other\') Nullable(Nothing) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: Nullable(Nothing) + ARGUMENTS + LIST id: 3, nodes: 4 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: NULL, constant_value_type: Nullable(Nothing) + CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 8, constant_value: \'other\', constant_value_type: String + JOIN TREE + TABLE id: 5, table_name: system.numbers + LIMIT + CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 +other +other +google +other +censor.net +other +yahoo +other +other +other +SELECT transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + transform(number, [2, 4, 6], [\'google\', \'censor.net\', \'yahoo\'], \'other\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: transform, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 4 + COLUMN id: 4, column_name: number, result_type: UInt64, source_id: 5 + CONSTANT id: 6, constant_value: Array_[UInt64_2, UInt64_4, UInt64_6], constant_value_type: Array(UInt8) + CONSTANT id: 7, constant_value: Array_[\'google\', \'censor.net\', \'yahoo\'], constant_value_type: Array(String) + CONSTANT id: 8, constant_value: \'other\', constant_value_type: String + JOIN TREE + TABLE id: 5, table_name: system.numbers + LIMIT + CONSTANT id: 9, constant_value: UInt64_10, constant_value_type: UInt8 +google +google +google +google +google +google +censor.net +censor.net +censor.net +censor.net +SELECT if(number > 5, \'censor.net\', \'google\') +FROM system.numbers +LIMIT 10 +QUERY id: 0 + PROJECTION COLUMNS + if(greater(number, 5), \'censor.net\', \'google\') String + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: if, function_type: ordinary, result_type: String + ARGUMENTS + LIST id: 3, nodes: 3 + FUNCTION id: 4, function_name: greater, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 5, nodes: 2 + COLUMN id: 6, column_name: number, result_type: UInt64, source_id: 7 + CONSTANT id: 8, constant_value: UInt64_5, constant_value_type: UInt8 + CONSTANT id: 9, constant_value: \'censor.net\', constant_value_type: String + CONSTANT id: 10, constant_value: \'google\', constant_value_type: String + JOIN TREE + TABLE id: 7, table_name: system.numbers + LIMIT + CONSTANT id: 11, constant_value: UInt64_10, constant_value_type: UInt8 diff --git a/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql new file mode 100644 index 00000000000..c23046c7b20 --- /dev/null +++ b/tests/queries/0_stateless/02497_if_transform_strings_to_enum.sql @@ -0,0 +1,52 @@ +SET allow_experimental_analyzer = 1; +SET optimize_if_transform_strings_to_enum = 1; + +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; + +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; + +SELECT CONCAT(transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other'), '1') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT CONCAT(transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other'), '1') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT CONCAT(transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other'), '1') FROM system.numbers LIMIT 10; + +SELECT CONCAT(number > 5 ? 'censor.net' : 'google', '1') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT CONCAT(number > 5 ? 'censor.net' : 'google', '1') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT CONCAT(number > 5 ? 'censor.net' : 'google', '1') FROM system.numbers LIMIT 10; + +SELECT t1.value FROM (SELECT number > 5 ? 'censor.net' : 'google' as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN SYNTAX SELECT t1.value FROM (SELECT number > 5 ? 'censor.net' : 'google' as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN QUERY TREE run_passes = 1 SELECT t1.value FROM (SELECT number > 5 ? 'censor.net' : 'google' as value FROM system.numbers LIMIT 10) as t1; + +SELECT t1.value FROM (SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN SYNTAX SELECT t1.value FROM (SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value FROM system.numbers LIMIT 10) as t1; +EXPLAIN QUERY TREE run_passes = 1 SELECT t1.value FROM (SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value FROM system.numbers LIMIT 10) as t1; + +SELECT number > 5 ? 'censor.net' : 'google' as value, value FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' as value, value FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT number > 5 ? 'censor.net' : 'google' as value, value FROM system.numbers LIMIT 10; + +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') as value, value FROM system.numbers LIMIT 10; + +SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); +EXPLAIN SYNTAX SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [NULL], ['google', 'censor.net', 'yahoo'], 'other') FROM (SELECT NULL as number FROM system.numbers LIMIT 10); + +SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, NULL, ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; + +SET optimize_if_transform_strings_to_enum = 0; + +SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT transform(number, [2, 4, 6], ['google', 'censor.net', 'yahoo'], 'other') FROM system.numbers LIMIT 10; + +SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN SYNTAX SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; +EXPLAIN QUERY TREE run_passes = 1 SELECT number > 5 ? 'censor.net' : 'google' FROM system.numbers LIMIT 10; diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.reference b/tests/queries/0_stateless/02497_schema_inference_nulls.reference new file mode 100644 index 00000000000..42dea6294e2 --- /dev/null +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.reference @@ -0,0 +1,48 @@ +JSONEachRow +x Array(Nullable(Int64)) +x Array(Nullable(Int64)) +x Array(Nullable(Int64)) +x Array(Nullable(Int64)) +x Tuple(Nullable(String), Nullable(Int64)) +x Tuple(Nullable(String), Nullable(Int64)) +x Map(String, Nullable(Int64)) +x Map(String, Nullable(Int64)) +x Array(Nullable(Int64)) +x Array(Array(Nullable(Int64))) +x Array(Map(String, Nullable(Int64))) +x Array(Array(Nullable(String))) +x Array(Int64) +x Array(Nullable(Int64)) +x Array(Int64) +x Array(Nullable(Int64)) +JSONCompactEachRow +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Tuple(Nullable(String), Nullable(Int64)) +c1 Tuple(Nullable(String), Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Array(Nullable(Int64))) +c1 Array(Map(String, Nullable(Int64))) +c1 Array(Array(Nullable(String))) +c1 Array(Int64) +c1 Array(Nullable(Int64)) +c1 Array(Int64) +c1 Array(Nullable(Int64)) +CSV +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Array(Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Map(String, Nullable(Int64)) +c1 Array(Array(Nullable(Int64))) +c1 Array(Map(String, Nullable(Int64))) +c1 Array(Array(Nullable(String))) +c1 Array(Int64) +c1 Array(Nullable(Int64)) +c1 Array(Int64) +c1 Array(Nullable(Int64)) diff --git a/tests/queries/0_stateless/02497_schema_inference_nulls.sql b/tests/queries/0_stateless/02497_schema_inference_nulls.sql new file mode 100644 index 00000000000..60cdaedcbd9 --- /dev/null +++ b/tests/queries/0_stateless/02497_schema_inference_nulls.sql @@ -0,0 +1,63 @@ +select 'JSONEachRow'; +set schema_inference_make_columns_nullable=1; +desc format(JSONEachRow, '{"x" : 1234}, {"x" : "String"}') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH } +desc format(JSONEachRow, '{"x" : [null, 1]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : []}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [null]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : [1, null]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : ["abc", 1]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}, {"x" : ["abc", null]}'); +desc format(JSONEachRow, '{"x" : {}}, {"x" : {"a" : 1}}'); +desc format(JSONEachRow, '{"x" : {"a" : null}}, {"x" : {"b" : 1}}'); +desc format(JSONEachRow, '{"x" : null}, {"x" : [1, 2]}'); +desc format(JSONEachRow, '{"x" : [[], [null], [1, 2, 3]]}'); +desc format(JSONEachRow, '{"x" : [{"a" : null}, {"b" : 1}]}'); +desc format(JSONEachRow, '{"x" : [["2020-01-01", null, "1234"], ["abcd"]]}'); + +set schema_inference_make_columns_nullable=0; +desc format(JSONEachRow, '{"x" : [1, 2]}'); +desc format(JSONEachRow, '{"x" : [null, 1]}'); +desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [3]}'); +desc format(JSONEachRow, '{"x" : [1, 2]}, {"x" : [null]}'); + +select 'JSONCompactEachRow'; +set schema_inference_make_columns_nullable=1; +desc format(JSONCompactEachRow, '[1234], ["String"]') settings input_format_json_try_infer_numbers_from_strings=1; -- { serverError TYPE_MISMATCH } +desc format(JSONCompactEachRow, '[[null, 1]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [[]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [[null]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [[1, null]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [["abc", 1]]'); +desc format(JSONCompactEachRow, '[[null, 1]], [["abc", null]]'); +desc format(JSONCompactEachRow, '[{}], [{"a" : 1}]'); +desc format(JSONCompactEachRow, '[{"a" : null}], [{"b" : 1}]'); +desc format(JSONCompactEachRow, '[null], [[1, 2]]'); +desc format(JSONCompactEachRow, '[[[], [null], [1, 2, 3]]]'); +desc format(JSONCompactEachRow, '[[{"a" : null}, {"b" : 1}]]'); +desc format(JSONCompactEachRow, '[[["2020-01-01", null, "1234"], ["abcd"]]]'); + +set schema_inference_make_columns_nullable=0; +desc format(JSONCompactEachRow, '[[1, 2]]'); +desc format(JSONCompactEachRow, '[[null, 1]]'); +desc format(JSONCompactEachRow, '[[1, 2]], [[3]]'); +desc format(JSONCompactEachRow, '[[1, 2]], [[null]]'); + + +select 'CSV'; +set schema_inference_make_columns_nullable=1; +desc format(CSV, '"[null, 1]"'); +desc format(CSV, '"[null, 1]"\n"[]"'); +desc format(CSV, '"[null, 1]"\n"[null]"'); +desc format(CSV, '"[null, 1]"\n"[1, null]"'); +desc format(CSV, '"{}"\n"{\'a\' : 1}"'); +desc format(CSV, '"{\'a\' : null}"\n"{\'b\' : 1}"'); +desc format(CSV, '"[[], [null], [1, 2, 3]]"'); +desc format(CSV, '"[{\'a\' : null}, {\'b\' : 1}]"'); +desc format(CSV, '"[[\'2020-01-01\', null, \'1234\'], [\'abcd\']]"'); + +set schema_inference_make_columns_nullable=0; +desc format(CSV, '"[1,2]"'); +desc format(CSV, '"[NULL, 1]"'); +desc format(CSV, '"[1, 2]"\n"[3]"'); +desc format(CSV, '"[1, 2]"\n"[null]"'); + diff --git a/tests/queries/0_stateless/02497_storage_join_right_assert.reference b/tests/queries/0_stateless/02497_storage_join_right_assert.reference index b254a03f4ce..fe032687be7 100644 --- a/tests/queries/0_stateless/02497_storage_join_right_assert.reference +++ b/tests/queries/0_stateless/02497_storage_join_right_assert.reference @@ -1,2 +1,4 @@ 2 2 2 3 0 3 +2 2 2 +3 0 3 diff --git a/tests/queries/0_stateless/02497_storage_join_right_assert.sql b/tests/queries/0_stateless/02497_storage_join_right_assert.sql index ee9e8713d32..8f9134e9504 100644 --- a/tests/queries/0_stateless/02497_storage_join_right_assert.sql +++ b/tests/queries/0_stateless/02497_storage_join_right_assert.sql @@ -7,4 +7,8 @@ CREATE TABLE t2 (key UInt64, a UInt64) ENGINE = Join(ALL, RIGHT, key); INSERT INTO t1 VALUES (1, 1), (2, 2); INSERT INTO t2 VALUES (2, 2), (3, 3); +SET allow_experimental_analyzer = 0; +SELECT * FROM t1 ALL RIGHT JOIN t2 USING (key) ORDER BY key; + +SET allow_experimental_analyzer = 1; SELECT * FROM t1 ALL RIGHT JOIN t2 USING (key) ORDER BY key; diff --git a/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference new file mode 100644 index 00000000000..ab45d56f303 --- /dev/null +++ b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.reference @@ -0,0 +1 @@ +s Nullable(String) diff --git a/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh new file mode 100755 index 00000000000..f9319af4fcb --- /dev/null +++ b/tests/queries/0_stateless/02498_random_string_in_json_schema_inference.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select randomString(100) as s format JSONEachRow" | $CLICKHOUSE_LOCAL -q "desc test" --table='test' --input-format='JSONEachRow' diff --git a/tests/queries/0_stateless/02498_storage_join_key_positions.reference b/tests/queries/0_stateless/02498_storage_join_key_positions.reference.j2 similarity index 84% rename from tests/queries/0_stateless/02498_storage_join_key_positions.reference rename to tests/queries/0_stateless/02498_storage_join_key_positions.reference.j2 index a11b547ca38..83d2ab32886 100644 --- a/tests/queries/0_stateless/02498_storage_join_key_positions.reference +++ b/tests/queries/0_stateless/02498_storage_join_key_positions.reference.j2 @@ -1,3 +1,5 @@ +{% for _ in range(2) -%} +--- using --- 21 22 23 2000 31 32 33 3000 41 42 43 4000 @@ -14,6 +16,11 @@ 31 32 33 3000 41 42 43 4000 51 52 53 5000 +21 22 23 2000 +31 32 33 3000 +41 42 43 4000 +51 52 53 5000 +--- on --- 21 22 23 22 21 23 2000 31 32 33 32 31 33 3000 41 42 43 42 41 43 4000 @@ -30,6 +37,7 @@ 31 32 33 32 31 33 3000 41 42 43 42 41 43 4000 51 52 53 52 51 53 5000 +--- on different name --- 23 21 22 22 21 23 2000 33 31 32 32 31 33 3000 43 41 42 42 41 43 4000 @@ -42,6 +50,8 @@ 33 31 32 32 31 33 3000 43 41 42 42 41 43 4000 53 51 52 52 51 53 5000 +--- incompatible --- +--- reuse column from left --- 11 12 13 11 11 11 1000 21 22 23 21 21 21 2000 31 32 33 31 31 31 3000 @@ -52,3 +62,4 @@ 31 32 33 31 31 31 3000 41 42 43 41 41 41 4000 51 52 53 51 51 51 5000 +{% endfor -%} diff --git a/tests/queries/0_stateless/02498_storage_join_key_positions.sql b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 similarity index 56% rename from tests/queries/0_stateless/02498_storage_join_key_positions.sql rename to tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 index 96687dab577..697f37fd535 100644 --- a/tests/queries/0_stateless/02498_storage_join_key_positions.sql +++ b/tests/queries/0_stateless/02498_storage_join_key_positions.sql.j2 @@ -8,20 +8,38 @@ INSERT INTO t1 VALUES (11, 12, 13), (21, 22, 23), (31, 32, 33), (41, 42, 43), (5 CREATE TABLE tj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1); INSERT INTO tj VALUES (22, 21, 23, 2000), (32, 31, 33, 3000), (42, 41, 43, 4000), (52, 51, 53, 5000), (62, 61, 63, 6000); +CREATE TABLE tjj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1); +INSERT INTO tjj VALUES (11, 11, 11, 1000), (21, 21, 21, 2000), (31, 31, 31, 3000), (41, 41, 41, 4000), (51, 51, 51, 5000), (61, 61, 61, 6000); + + +{% for use_analyzer in [0, 1] -%} +SET allow_experimental_analyzer = '{{ use_analyzer }}'; + +SELECT '--- using ---'; SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3, key1) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj USING (key3, key2, key1) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key3, key2) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key1, key2, key3) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key2, key3, key1) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key3, key2, key1) ORDER BY key1; +SELECT key1, key2, key3, attr FROM t1 ALL INNER JOIN tj USING (key1, key3, key2) ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND t1.key1 = tj.key1 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key1 = tj.key1 AND t1.key2 = tj.key2 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 ORDER BY key1; +SELECT '--- on ---'; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND t1.key1 = tj.key1 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key3 = tj.key3 AND t1.key1 = tj.key1 AND t1.key2 = tj.key2 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 ORDER BY t1.key1; +SELECT '--- on different name ---'; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.c = tj.key3 AND t1.b = tj.key2 ORDER BY t1.a; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.a = tj.key1 AND t1.b = tj.key2 AND t1.c = tj.key3 ORDER BY t1.a; SELECT * FROM (SELECT key3 AS c, key1 AS a, key2 AS b FROM t1) AS t1 ALL INNER JOIN tj ON t1.c = tj.key3 AND t1.a = tj.key1 AND t1.b = tj.key2 ORDER BY t1.a; +-- TODO (vdimir): uncomment after https://github.com/ClickHouse/ClickHouse/pull/44016 +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 == 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +-- SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key3 = tj.key3 AND t1.key2 = tj.key2 AND 1 > 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } + +SELECT '--- incompatible ---'; SELECT * FROM t1 ALL INNER JOIN tj ON 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON NULL; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } @@ -29,19 +47,20 @@ SELECT * FROM t1 ALL INNER JOIN tj ON 1 == 1; -- { serverError INCOMPATIBLE_TYPE SELECT * FROM t1 ALL INNER JOIN tj ON 1 != 1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj USING (key2, key3); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } -SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr); -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 0; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT * FROM t1 ALL INNER JOIN tj USING (key1, key2, key3, attr) SETTINGS allow_experimental_analyzer = 1; -- { serverError UNKNOWN_IDENTIFIER } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.attr; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } SELECT * FROM t1 ALL INNER JOIN tj ON t1.key1 = tj.key1 AND t1.key2 = tj.key2 AND t1.key3 = tj.key3 AND t1.key1 = tj.key1; -- { serverError INCOMPATIBLE_TYPE_OF_JOIN } +SELECT '--- reuse column from left ---'; +SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key2 AND t1.key1 = tjj.key3 ORDER BY t1.key1; +SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key3 AND t1.key1 = tjj.key2 ORDER BY t1.key1; -CREATE TABLE tjj (key2 UInt64, key1 UInt64, key3 UInt64, attr UInt64) ENGINE = Join(ALL, INNER, key3, key2, key1); -INSERT INTO tjj VALUES (11, 11, 11, 1000), (21, 21, 21, 2000), (31, 31, 31, 3000), (41, 41, 41, 4000), (51, 51, 51, 5000), (61, 61, 61, 6000); - -SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key2 AND t1.key1 = tjj.key3 ORDER BY key1; -SELECT * FROM t1 ALL INNER JOIN tjj ON t1.key1 = tjj.key1 AND t1.key1 = tjj.key3 AND t1.key1 = tjj.key2 ORDER BY key1; +{% endfor -%} DROP TABLE IF EXISTS t1; DROP TABLE IF EXISTS tj; diff --git a/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.reference b/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.sql b/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.sql new file mode 100644 index 00000000000..80a89a0306d --- /dev/null +++ b/tests/queries/0_stateless/02499_analyzer_aggregate_function_lambda_crash_fix.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; + +SELECT count((t, x_0, x_1) -> ((key_2, x_0, x_1) IN (NULL, NULL, '0.3'))) FROM numbers(10); -- { serverError 1 } +SELECT count((t, x_0, x_1) -> ((key_2, x_0, x_1) IN (NULL, NULL, '0.3'))) OVER (PARTITION BY id) FROM numbers(10); -- { serverError 1 } diff --git a/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference new file mode 100644 index 00000000000..768063eb8f4 --- /dev/null +++ b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.reference @@ -0,0 +1,2 @@ +c1 Array(Nullable(String)) +c1 Nullable(String) diff --git a/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql new file mode 100644 index 00000000000..34c523387ee --- /dev/null +++ b/tests/queries/0_stateless/02499_escaped_quote_schema_inference.sql @@ -0,0 +1,2 @@ +desc format(CSV, '"[\'abc\\\'\']"'); +desc format(Values, '(\'abc\\\'\')'); diff --git a/tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.reference b/tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.reference new file mode 100644 index 00000000000..7f49bc212e6 --- /dev/null +++ b/tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.reference @@ -0,0 +1,25 @@ +['1970-01-01 00:00:00'] +['1970-01-01 00:00:00'] + +['1970-01-01 00:00:00'] +['1970-01-01 00:00:00'] +['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00'] + +['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00'] +['1970-01-01 00:00:00'] +['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00'] +[18446744073709552000] +['1970-01-01 00:00:00'] +['1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00','1970-01-01 00:00:00'] +[1.157920892373162e77] +[nan] +1970-01-01 00:00:00 +1970-01-01 00:00:00 + +1970-01-01 00:00:00 +1970-01-01 00:00:00 +1970-01-01 00:00:00 +18446744073709552000 +1970-01-01 00:00:00 +1.157920892373162e77 +nan diff --git a/tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.sql b/tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.sql new file mode 100644 index 00000000000..d8a8a040a7c --- /dev/null +++ b/tests/queries/0_stateless/02499_quantile_nan_ubsan_msan.sql @@ -0,0 +1,22 @@ +SELECT quantiles(0.5)(now()::DateTime('UTC')) WHERE 0; +SELECT quantiles(0.5)(now()::DateTime('UTC')) WHERE 0 WITH TOTALS; +SELECT arrayReduce('quantiles(0.5)', []::Array(DateTime('UTC'))); +SELECT quantiles(0.5, 1.1754943508222875e-38, 0.0001, -0., 0.0001, -0., 0.0001, 0., 0.5)(now()::DateTime('UTC')) WHERE 0 WITH TOTALS; + +SELECT DISTINCT arrayReduce('quantiles(0.5)', materialize([]::Array(DateTime('UTC')))) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantiles(0, 0.5, 0.9, 1)', materialize([]::Array(DateTime('UTC')))) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantiles(0.5)', [0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE]) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantilesDeterministic(0.5)', materialize([]::Array(DateTime('UTC'))), []::Array(UInt64)) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantilesDeterministic(0, 0.5, 0.9, 1)', materialize([]::Array(DateTime('UTC'))), []::Array(UInt64)) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantiles(0.5)', [CAST(-1, 'UInt256'), CAST(-2, 'UInt256')]) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantiles(0.5)', []::Array(Float64)) FROM numbers(1000) LIMIT 10; + +SELECT quantile(0.5)(now()::DateTime('UTC')) WHERE 0; +SELECT quantile(0.5)(now()::DateTime('UTC')) WHERE 0 WITH TOTALS; +SELECT arrayReduce('quantile(0.5)', []::Array(DateTime('UTC'))); + +SELECT DISTINCT arrayReduce('quantile(0.5)', materialize([]::Array(DateTime('UTC')))) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantile(0.5)', [0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFE]) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantileDeterministic(0.5)', materialize([]::Array(DateTime('UTC'))), []::Array(UInt64)) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantile(0.5)', [CAST(-1, 'UInt256'), CAST(-2, 'UInt256')]) FROM numbers(1000) LIMIT 10; +SELECT DISTINCT arrayReduce('quantile(0.5)', []::Array(Float64)) FROM numbers(1000) LIMIT 10; diff --git a/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.reference b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.reference new file mode 100644 index 00000000000..bfde072a796 --- /dev/null +++ b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.reference @@ -0,0 +1 @@ +2 2 diff --git a/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.sql b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.sql new file mode 100644 index 00000000000..8f4d14b95cc --- /dev/null +++ b/tests/queries/0_stateless/02500_analyzer_storage_view_crash_fix.sql @@ -0,0 +1,19 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + f1 Int32, + f2 Int32, + pk Int32 +) ENGINE = MergeTree PARTITION BY pk ORDER BY f1; + +INSERT INTO test_table SELECT number, number, number FROM numbers(10); + +DROP VIEW IF EXISTS test_view; +CREATE VIEW test_view AS SELECT f1, f2 FROM test_table WHERE pk = 2; + +SELECT * FROM test_view; + +DROP VIEW test_view; +DROP TABLE test_table; diff --git a/tests/queries/0_stateless/02500_numbers_inference.reference b/tests/queries/0_stateless/02500_numbers_inference.reference new file mode 100644 index 00000000000..bff7211f66a --- /dev/null +++ b/tests/queries/0_stateless/02500_numbers_inference.reference @@ -0,0 +1,20 @@ +x Nullable(Float64) +x Nullable(Float64) +x Nullable(Int64) +x Nullable(Int64) +x Nullable(Float64) +x Nullable(Float64) +x Array(Nullable(Float64)) +x Array(Nullable(Float64)) +x Array(Nullable(Float64)) +x Array(Nullable(Float64)) +c1 Nullable(Float64) +c1 Nullable(Float64) +c1 Nullable(Int64) +c1 Nullable(Int64) +c1 Nullable(Float64) +c1 Nullable(Float64) +c1 Array(Nullable(Float64)) +c1 Array(Nullable(Float64)) +c1 Array(Nullable(Float64)) +c1 Array(Nullable(Float64)) diff --git a/tests/queries/0_stateless/02500_numbers_inference.sh b/tests/queries/0_stateless/02500_numbers_inference.sh new file mode 100755 index 00000000000..ce9cd5bdc9f --- /dev/null +++ b/tests/queries/0_stateless/02500_numbers_inference.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1.2}')"; +echo '{"x" : 1.2}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1}')"; +echo '{"x" : 1}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : 1e10}')"; +echo '{"x" : 1e10}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, 1, 1e10]}')"; +echo '{"x" : [1, 42.42, 1, 1e10]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(JSONEachRow, '{\"x\" : [1, 42.42, false]}')"; +echo '{"x" : [1, 42.42, false]}' | $CLICKHOUSE_LOCAL --input-format='JSONEachRow' --table='test' -q "desc test"; + +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1.2')"; +echo '1.2' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1')"; +echo '1' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '1e10')"; +echo '1e10' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, 1, 1e10]')"; +echo '[1, 42.42, 1, 1e10]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; +$CLICKHOUSE_LOCAL -q "desc format(TSV, '[1, 42.42, false]')"; +echo '[1, 42.42, false]' | $CLICKHOUSE_LOCAL --input-format='TSV' --table='test' -q "desc test"; + diff --git a/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.reference b/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.sql b/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.sql new file mode 100644 index 00000000000..529f574d32d --- /dev/null +++ b/tests/queries/0_stateless/02500_prevent_drop_nested_if_empty_part.sql @@ -0,0 +1,49 @@ +DROP TABLE IF EXISTS 02500_nested; + +SET flatten_nested = 1; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32))) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr.a, arr.b) VALUES ([1], [2]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN arr; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr.a, arr.b, z) VALUES ([1], [2], 2); +ALTER TABLE 02500_nested DROP COLUMN arr; +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Nested(a Int32, b Int32)) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes.a, nes.b) VALUES ([1], [2]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN nes; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes.a, nes.b, z) VALUES ([1], [2], 2); +ALTER TABLE 02500_nested DROP COLUMN nes; +DROP TABLE 02500_nested; + +SET flatten_nested = 0; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32))) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr) VALUES ([(1, 2)]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN arr; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(arr Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(arr, z) VALUES ([(1, 2)], 2); +ALTER TABLE 02500_nested DROP COLUMN arr; +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Nested(a Int32, b Int32)) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes) VALUES ([(1, 2)]); +ALTER TABLE 02500_nested ADD COLUMN z Int32; +ALTER TABLE 02500_nested DROP COLUMN nes; -- { serverError BAD_ARGUMENTS } +DROP TABLE 02500_nested; + +CREATE TABLE 02500_nested(nes Array(Tuple(a Int32, b Int32)), z Int32) Engine=MergeTree ORDER BY tuple(); +INSERT INTO 02500_nested(nes, z) VALUES ([(1, 2)], 2); +ALTER TABLE 02500_nested DROP COLUMN nes; +DROP TABLE 02500_nested; diff --git a/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference new file mode 100644 index 00000000000..d86bac9de59 --- /dev/null +++ b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.reference @@ -0,0 +1 @@ +OK diff --git a/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh new file mode 100755 index 00000000000..96142432557 --- /dev/null +++ b/tests/queries/0_stateless/02501_deep_recusion_schema_inference.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select repeat('[', 10000) || '1,2,3' || repeat(']', 10000)" > 02501_deep_nested_array.tsv +$CLICKHOUSE_LOCAL -q "desc file(02501_deep_nested_array.tsv)" 2>&1 | grep -q -F "TOO_DEEP_RECURSION" && echo "OK" || echo "FAIL" +rm 02501_deep_nested_array.tsv + diff --git a/tests/queries/0_stateless/02502_bad_values_schema_inference.reference b/tests/queries/0_stateless/02502_bad_values_schema_inference.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02502_bad_values_schema_inference.sql b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql new file mode 100644 index 00000000000..4c796842c0d --- /dev/null +++ b/tests/queries/0_stateless/02502_bad_values_schema_inference.sql @@ -0,0 +1,2 @@ +desc format(Values, '(\'abc)'); -- { serverError CANNOT_PARSE_INPUT_ASSERTION_FAILED } + diff --git a/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.reference b/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.sql b/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.sql new file mode 100644 index 00000000000..3db59d9a38f --- /dev/null +++ b/tests/queries/0_stateless/02502_fuzz_bad_cast_to_ast_literal.sql @@ -0,0 +1,7 @@ +SET allow_deprecated_syntax_for_merge_tree=1; +DROP TABLE IF EXISTS test54378; +CREATE TABLE test54378 (`part_date` Date, `pk_date` Date, `date` Date) ENGINE = MergeTree(part_date, pk_date, 8192); +INSERT INTO test54378 values ('2018-04-19', '2018-04-19', '2018-04-19'); +SELECT 232 FROM test54378 PREWHERE (part_date = (SELECT toDate('2018-04-19'))) IN (SELECT toDate('2018-04-19')) GROUP BY toDate(toDate(-2147483649, NULL), NULL), -inf; +DROP TABLE test54378; + diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference new file mode 100644 index 00000000000..8baef1b4abc --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.reference @@ -0,0 +1 @@ +abc diff --git a/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql new file mode 100644 index 00000000000..6756e381586 --- /dev/null +++ b/tests/queries/0_stateless/02503_in_lc_const_args_bug.sql @@ -0,0 +1,2 @@ +SELECT substr(toLowCardinality('abc'), 1 in 1) AS x GROUP BY x; + diff --git a/tests/queries/0_stateless/02503_insert_storage_snapshot.reference b/tests/queries/0_stateless/02503_insert_storage_snapshot.reference new file mode 100644 index 00000000000..4e07416f18a --- /dev/null +++ b/tests/queries/0_stateless/02503_insert_storage_snapshot.reference @@ -0,0 +1 @@ +all_1_1_0 1 1 diff --git a/tests/queries/0_stateless/02503_insert_storage_snapshot.sh b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh new file mode 100755 index 00000000000..af2952839df --- /dev/null +++ b/tests/queries/0_stateless/02503_insert_storage_snapshot.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +set -e + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_insert_storage_snapshot" +$CLICKHOUSE_CLIENT -q "CREATE TABLE t_insert_storage_snapshot (a UInt64) ENGINE = MergeTree ORDER BY a" +$CLICKHOUSE_CLIENT -q "INSERT INTO t_insert_storage_snapshot VALUES (1)" + +query_id="$CLICKHOUSE_DATABASE-$RANDOM" +$CLICKHOUSE_CLIENT --query_id $query_id -q "INSERT INTO t_insert_storage_snapshot SELECT sleep(1) FROM numbers(1000) SETTINGS max_block_size = 1" 2>/dev/null & + +$CLICKHOUSE_CLIENT -q "SELECT name, active, refcount FROM system.parts WHERE database = '$CLICKHOUSE_DATABASE' AND table = 't_insert_storage_snapshot'" +$CLICKHOUSE_CLIENT -q "KILL QUERY WHERE query_id = '$query_id' SYNC" >/dev/null + +wait + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_insert_storage_snapshot" diff --git a/tests/queries/0_stateless/02510_orc_map_indexes.reference b/tests/queries/0_stateless/02510_orc_map_indexes.reference new file mode 100644 index 00000000000..8aa75d1e92d --- /dev/null +++ b/tests/queries/0_stateless/02510_orc_map_indexes.reference @@ -0,0 +1,3 @@ +0 {0:0} Hello +1 {1:1} Hello +2 {2:2} Hello diff --git a/tests/queries/0_stateless/02510_orc_map_indexes.sh b/tests/queries/0_stateless/02510_orc_map_indexes.sh new file mode 100755 index 00000000000..77fe30f48de --- /dev/null +++ b/tests/queries/0_stateless/02510_orc_map_indexes.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Tags: no-parallel, no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select number, map(number, number) as map, 'Hello' as str from numbers(3) format ORC" | $CLICKHOUSE_LOCAL --input-format=ORC -q "select * from table"; + diff --git a/tests/queries/0_stateless/02512_array_join_name_resolution.reference b/tests/queries/0_stateless/02512_array_join_name_resolution.reference new file mode 100644 index 00000000000..62263461e0d --- /dev/null +++ b/tests/queries/0_stateless/02512_array_join_name_resolution.reference @@ -0,0 +1,2 @@ +Hello test +World test diff --git a/tests/queries/0_stateless/02512_array_join_name_resolution.sql b/tests/queries/0_stateless/02512_array_join_name_resolution.sql new file mode 100644 index 00000000000..5bcea967791 --- /dev/null +++ b/tests/queries/0_stateless/02512_array_join_name_resolution.sql @@ -0,0 +1,19 @@ +DROP TABLE IF EXISTS x; +CREATE TABLE x ( `arr.key` Array(String), `arr.value` Array(String), `n` String ) ENGINE = Memory; +INSERT INTO x VALUES (['Hello', 'World'], ['abc', 'def'], 'test'); + +SELECT + key, + any(toString(n)) +FROM +( + SELECT + arr.key AS key, + n + FROM x + ARRAY JOIN arr +) +GROUP BY key +ORDER BY key; + +DROP TABLE x; diff --git a/tests/queries/0_stateless/parts.lib b/tests/queries/0_stateless/parts.lib index c35f996ffed..7aec10392f0 100644 --- a/tests/queries/0_stateless/parts.lib +++ b/tests/queries/0_stateless/parts.lib @@ -37,3 +37,6 @@ function wait_for_delete_inactive_parts() echo "Timed out while waiting for delete inactive parts!" >&2 return 2 } + +export -f wait_for_delete_empty_parts +export -f wait_for_delete_inactive_parts diff --git a/utils/changelog/changelog.py b/utils/changelog/changelog.py index 305899fe7e8..3b66b68193b 100755 --- a/utils/changelog/changelog.py +++ b/utils/changelog/changelog.py @@ -33,7 +33,7 @@ categories_preferred_order = ( FROM_REF = "" TO_REF = "" SHA_IN_CHANGELOG = [] # type: List[str] -gh = GitHub() +gh = GitHub(create_cache_dir=False) CACHE_PATH = p.join(p.dirname(p.realpath(__file__)), "gh_cache") @@ -384,7 +384,11 @@ def main(): # Get all PRs for the given time frame global gh gh = GitHub( - args.gh_user_or_token, args.gh_password, per_page=100, pool_size=args.jobs + args.gh_user_or_token, + args.gh_password, + create_cache_dir=False, + per_page=100, + pool_size=args.jobs, ) gh.cache_path = CACHE_PATH query = f"type:pr repo:{args.repo} is:merged" diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index e927a86058b..40a8af9b5b6 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -430,6 +430,7 @@ rowbinary rowbinarywithnames rowbinarywithnamesandtypes rsync +runnable runningAccumulate runtime russian @@ -482,12 +483,14 @@ th tmp tokenization toml +toolchain toolset tskv tsv tui turbostat txt +ubuntu uint unary unencrypted diff --git a/utils/check-style/check-submodules b/utils/check-style/check-submodules new file mode 100755 index 00000000000..815e6c13c0f --- /dev/null +++ b/utils/check-style/check-submodules @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# The script checks if all submodules defined in $GIT_ROOT/.gitmodules exist in $GIT_ROOT/contrib + +set -e + +GIT_ROOT=$(git rev-parse --show-cdup) +GIT_ROOT=${GIT_ROOT:-.} + +cd "$GIT_ROOT" + +# Remove keys for submodule.*.path parameters, the values are separated by \0 +# and check if the directory exists +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} bash -c 'if ! test -d {}; then echo Directory for submodule {} is not found; exit 1; fi' 2>&1 + + +# And check that the submodule is fine +git config --file .gitmodules --null --get-regexp path | sed -z 's|.*\n||' | \ + xargs -P100 -0 --no-run-if-empty -I{} git submodule status -q {} 2>&1 diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index 23aa0d1dbaf..d4c5924d6f3 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,9 +1,11 @@ +v22.12.1.1752-stable 2022-12-15 v22.11.2.30-stable 2022-12-02 v22.11.1.1360-stable 2022-11-17 v22.10.4.23-stable 2022-12-02 v22.10.3.27-stable 2022-11-23 v22.10.2.11-stable 2022-11-01 v22.10.1.1877-stable 2022-10-26 +v22.9.7.34-stable 2022-12-16 v22.9.6.20-stable 2022-12-02 v22.9.5.25-stable 2022-11-23 v22.9.4.32-stable 2022-10-26