mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into crc-power-fix
This commit is contained in:
commit
5fa5ae8099
@ -128,18 +128,12 @@ EOL
|
||||
|
||||
function stop()
|
||||
{
|
||||
local max_tries="${1:-90}"
|
||||
local pid
|
||||
# Preserve the pid, since the server can hung after the PID will be deleted.
|
||||
pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
|
||||
|
||||
clickhouse stop $max_tries --do-not-kill && return
|
||||
|
||||
if [ -n "$1" ]
|
||||
then
|
||||
# temporarily disable it in BC check
|
||||
clickhouse stop --force
|
||||
return
|
||||
fi
|
||||
clickhouse stop --max-tries "$max_tries" --do-not-kill && return
|
||||
|
||||
# We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
|
||||
kill -TERM "$(pidof gdb)" ||:
|
||||
@ -465,7 +459,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
|
||||
clickhouse stop --force
|
||||
)
|
||||
|
||||
stop 1
|
||||
# Use bigger timeout for previous version
|
||||
stop 300
|
||||
mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
|
||||
|
||||
# Start new server
|
||||
|
@ -7,186 +7,27 @@ sidebar_label: 2023
|
||||
|
||||
### ClickHouse release v22.3.16.1190-lts (bb4e0934e5a) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189)
|
||||
|
||||
#### Backward Incompatible Change
|
||||
* JSONExtract family of functions will now attempt to coerce to the request type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)).
|
||||
* Backported in [#43484](https://github.com/ClickHouse/ClickHouse/issues/43484): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
|
||||
#### New Feature
|
||||
* - Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)).
|
||||
* Add Hudi and DeltaLake table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do)).
|
||||
* Add 4LW command `csnp` for manually creating snapshots. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)).
|
||||
* Add function ascii like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)).
|
||||
* Published function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)).
|
||||
|
||||
#### Performance Improvement
|
||||
* Currently, the only saturable operators are And and Or, and their code paths are affected by this change. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
|
||||
* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)).
|
||||
* Support parallel parsing for LineAsString input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### Improvement
|
||||
* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Backported in [#42527](https://github.com/ClickHouse/ClickHouse/issues/42527): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* ClickHouse Client and ClickHouse Local will show progress by default even in non-interactive mode. If `/dev/tty` is available, the progress will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* - Add `notLike` to key condition atom map, so condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)).
|
||||
* Add support for FixedString input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)).
|
||||
* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)).
|
||||
* Added ** glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
* Add a new variable call `limit` in query_info, indicating whether this query is a limit-trivial query. If so, we will adjust the approximate total rows for later estimation. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)).
|
||||
* Implement `ATTACH` of `MergeTree` table for `s3_plain` disk (plus some fixes for `s3_plain`). [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)).
|
||||
* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Added new field allow_readonly in system.table_functions to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
* Allow to use Date32 arguments for formatDateTime and FROM_UNIXTIME functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)).
|
||||
* Backported in [#42839](https://github.com/ClickHouse/ClickHouse/issues/42839): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)).
|
||||
* Increase the size of upload part exponentially for backup to S3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)).
|
||||
|
||||
#### Bug Fix
|
||||
* Backported in [#43829](https://github.com/ClickHouse/ClickHouse/issues/43829): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)).
|
||||
* use llvm `l64.lld` in macOS suppress ld warnings, close [#42282](https://github.com/ClickHouse/ClickHouse/issues/42282). [#42470](https://github.com/ClickHouse/ClickHouse/pull/42470) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
|
||||
* * Improve bugfix validation check: fix bug with skipping the check, port separate status in CI, run after check labels and style check. Close [#40349](https://github.com/ClickHouse/ClickHouse/issues/40349). [#42702](https://github.com/ClickHouse/ClickHouse/pull/42702) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Backported in [#43050](https://github.com/ClickHouse/ClickHouse/issues/43050): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#42963](https://github.com/ClickHouse/ClickHouse/issues/42963): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#43039](https://github.com/ClickHouse/ClickHouse/issues/43039): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#44109](https://github.com/ClickHouse/ClickHouse/issues/44109): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#44431](https://github.com/ClickHouse/ClickHouse/issues/44431): Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Backported in [#44557](https://github.com/ClickHouse/ClickHouse/issues/44557): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
#### Bug Fix (user-visible misbehavior in official stable or prestable release)
|
||||
|
||||
* Fix schema inference in s3Cluster and improve in hdfsCluster. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix retries while reading from http table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} <Fatal> BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} <Fatal> BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} <Fatal> BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} <Fatal> BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} <Fatal> BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} <Fatal> BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} <Fatal> BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* Fix create Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)).
|
||||
* `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)).
|
||||
* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)).
|
||||
* Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* * Fix incorrect saved_block_sample with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)).
|
||||
* A null pointer will be generated when select if as from ‘three table join’ , For example, the SQL:. [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)).
|
||||
* Fix memory sanitizer report in ClusterDiscovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Fix ATTACH TABLE in PostgreSQL database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)).
|
||||
* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Backported in [#43512](https://github.com/ClickHouse/ClickHouse/issues/43512): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Backported in [#43750](https://github.com/ClickHouse/ClickHouse/issues/43750): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
|
||||
* Backported in [#43427](https://github.com/ClickHouse/ClickHouse/issues/43427): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
* Backported in [#43616](https://github.com/ClickHouse/ClickHouse/issues/43616): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Backported in [#43720](https://github.com/ClickHouse/ClickHouse/issues/43720): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)).
|
||||
* Backported in [#43885](https://github.com/ClickHouse/ClickHouse/issues/43885): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
|
||||
* Backported in [#44179](https://github.com/ClickHouse/ClickHouse/issues/44179): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Backported in [#44283](https://github.com/ClickHouse/ClickHouse/issues/44283): Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)).
|
||||
|
||||
#### Build Improvement
|
||||
|
||||
* ... Add support for format ipv6 on s390x. [#42412](https://github.com/ClickHouse/ClickHouse/pull/42412) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
|
||||
|
||||
#### NO CL ENTRY
|
||||
|
||||
* NO CL ENTRY: 'Revert "Sonar Cloud Workflow"'. [#42725](https://github.com/ClickHouse/ClickHouse/pull/42725) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* NO CL ENTRY: 'Fix multipart upload for large S3 object, backport to 22.3'. [#44217](https://github.com/ClickHouse/ClickHouse/pull/44217) ([ianton-ru](https://github.com/ianton-ru)).
|
||||
|
||||
#### NOT FOR CHANGELOG / INSIGNIFICANT
|
||||
|
||||
* Build with libcxx(abi) 15 [#42513](https://github.com/ClickHouse/ClickHouse/pull/42513) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Sonar Cloud Workflow [#42534](https://github.com/ClickHouse/ClickHouse/pull/42534) ([Julio Jimenez](https://github.com/juliojimenez)).
|
||||
* Invalid type in where for Merge table (logical error) [#42576](https://github.com/ClickHouse/ClickHouse/pull/42576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
|
||||
* Fix frequent memory drift message and clarify things in comments [#42582](https://github.com/ClickHouse/ClickHouse/pull/42582) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Try to save `IDataPartStorage` interface [#42618](https://github.com/ClickHouse/ClickHouse/pull/42618) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Analyzer change setting into allow_experimental_analyzer [#42649](https://github.com/ClickHouse/ClickHouse/pull/42649) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Analyzer IQueryTreeNode remove getName method [#42651](https://github.com/ClickHouse/ClickHouse/pull/42651) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Minor fix iotest_nonblock build [#42658](https://github.com/ClickHouse/ClickHouse/pull/42658) ([Jordi Villar](https://github.com/jrdi)).
|
||||
* Add tests and doc for some url-related functions [#42664](https://github.com/ClickHouse/ClickHouse/pull/42664) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Update version_date.tsv and changelogs after v22.10.1.1875-stable [#42676](https://github.com/ClickHouse/ClickHouse/pull/42676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix error handling in clickhouse_helper.py [#42678](https://github.com/ClickHouse/ClickHouse/pull/42678) ([Ilya Yatsishin](https://github.com/qoega)).
|
||||
* Fix execution of version_helper.py to use git tweaks [#42679](https://github.com/ClickHouse/ClickHouse/pull/42679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* MergeTree indexes use RPNBuilderTree [#42681](https://github.com/ClickHouse/ClickHouse/pull/42681) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update version after release [#42699](https://github.com/ClickHouse/ClickHouse/pull/42699) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Update version_date.tsv and changelogs after v22.10.1.1877-stable [#42700](https://github.com/ClickHouse/ClickHouse/pull/42700) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* OrderByLimitByDuplicateEliminationPass improve performance [#42704](https://github.com/ClickHouse/ClickHouse/pull/42704) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Analyzer improve subqueries representation [#42705](https://github.com/ClickHouse/ClickHouse/pull/42705) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Update version_date.tsv and changelogs after v22.9.4.32-stable [#42712](https://github.com/ClickHouse/ClickHouse/pull/42712) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v22.8.7.34-lts [#42713](https://github.com/ClickHouse/ClickHouse/pull/42713) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update version_date.tsv and changelogs after v22.7.7.24-stable [#42714](https://github.com/ClickHouse/ClickHouse/pull/42714) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Move SonarCloud Job to nightly [#42718](https://github.com/ClickHouse/ClickHouse/pull/42718) ([Julio Jimenez](https://github.com/juliojimenez)).
|
||||
* Update version_date.tsv and changelogs after v22.8.8.3-lts [#42738](https://github.com/ClickHouse/ClickHouse/pull/42738) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Minor fix implicit cast CaresPTRResolver [#42747](https://github.com/ClickHouse/ClickHouse/pull/42747) ([Jordi Villar](https://github.com/jrdi)).
|
||||
* Fix build on master [#42752](https://github.com/ClickHouse/ClickHouse/pull/42752) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Update version_date.tsv and changelogs after v22.3.14.18-lts [#42759](https://github.com/ClickHouse/ClickHouse/pull/42759) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Fix anchor links [#42760](https://github.com/ClickHouse/ClickHouse/pull/42760) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Update version_date.tsv and changelogs after v22.3.14.23-lts [#42764](https://github.com/ClickHouse/ClickHouse/pull/42764) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Update README.md [#42783](https://github.com/ClickHouse/ClickHouse/pull/42783) ([Yuko Takagi](https://github.com/yukotakagi)).
|
||||
* Slightly better code with projections [#42794](https://github.com/ClickHouse/ClickHouse/pull/42794) ([Anton Popov](https://github.com/CurtizJ)).
|
||||
* Fix some races in MergeTree [#42805](https://github.com/ClickHouse/ClickHouse/pull/42805) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix typo in comments [#42809](https://github.com/ClickHouse/ClickHouse/pull/42809) ([Gabriel](https://github.com/Gabriel39)).
|
||||
* Fix compilation of LLVM with cmake cache [#42816](https://github.com/ClickHouse/ClickHouse/pull/42816) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix link in docs [#42821](https://github.com/ClickHouse/ClickHouse/pull/42821) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Link to proper place in docs [#42822](https://github.com/ClickHouse/ClickHouse/pull/42822) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Fix argument type check in AggregateFunctionAnalysisOfVariance [#42823](https://github.com/ClickHouse/ClickHouse/pull/42823) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Tests/lambda analyzer [#42824](https://github.com/ClickHouse/ClickHouse/pull/42824) ([Denny Crane](https://github.com/den-crane)).
|
||||
* Fix Missing Quotes - Sonar Nightly [#42831](https://github.com/ClickHouse/ClickHouse/pull/42831) ([Julio Jimenez](https://github.com/juliojimenez)).
|
||||
* Add exclusions from the Snyk scan [#42834](https://github.com/ClickHouse/ClickHouse/pull/42834) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix Missing Env Vars - Sonar Nightly [#42843](https://github.com/ClickHouse/ClickHouse/pull/42843) ([Julio Jimenez](https://github.com/juliojimenez)).
|
||||
* Fix typo [#42855](https://github.com/ClickHouse/ClickHouse/pull/42855) ([GoGoWen](https://github.com/GoGoWen)).
|
||||
* Add timezone to 02458_datediff_date32 [#42857](https://github.com/ClickHouse/ClickHouse/pull/42857) ([Vladimir C](https://github.com/vdimir)).
|
||||
* Adjust cancel and rerun workflow names to the actual [#42862](https://github.com/ClickHouse/ClickHouse/pull/42862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Analyzer subquery in JOIN TREE with aggregation [#42865](https://github.com/ClickHouse/ClickHouse/pull/42865) ([Maksim Kita](https://github.com/kitaisreal)).
|
||||
* Fix getauxval for sanitizer builds [#42866](https://github.com/ClickHouse/ClickHouse/pull/42866) ([Amos Bird](https://github.com/amosbird)).
|
||||
* Update version_date.tsv and changelogs after v22.10.2.11-stable [#42871](https://github.com/ClickHouse/ClickHouse/pull/42871) ([robot-clickhouse](https://github.com/robot-clickhouse)).
|
||||
* Validate Query Tree in debug [#42879](https://github.com/ClickHouse/ClickHouse/pull/42879) ([Dmitry Novik](https://github.com/novikd)).
|
||||
* changed type name for s3 plain storage [#42890](https://github.com/ClickHouse/ClickHouse/pull/42890) ([Aleksandr](https://github.com/AVMusorin)).
|
||||
* Cleanup implementation of regexpReplace(All|One) [#42907](https://github.com/ClickHouse/ClickHouse/pull/42907) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Do not show status for Bugfix validate check in non bugfix PRs [#42932](https://github.com/ClickHouse/ClickHouse/pull/42932) ([Vladimir C](https://github.com/vdimir)).
|
||||
* fix(typo): Passible -> Possible [#42933](https://github.com/ClickHouse/ClickHouse/pull/42933) ([Yakko Majuri](https://github.com/yakkomajuri)).
|
||||
* Pin the cryptography version to not break lambdas [#42934](https://github.com/ClickHouse/ClickHouse/pull/42934) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix: bad cast from type DB::ColumnLowCardinality to DB::ColumnString [#42937](https://github.com/ClickHouse/ClickHouse/pull/42937) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Attach thread pool for loading parts to the query [#42947](https://github.com/ClickHouse/ClickHouse/pull/42947) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Fix macOS M1 builds due to sprintf deprecation [#42962](https://github.com/ClickHouse/ClickHouse/pull/42962) ([Jordi Villar](https://github.com/jrdi)).
|
||||
* Less use of CH-specific bit_cast() [#42968](https://github.com/ClickHouse/ClickHouse/pull/42968) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Remove some utils [#42972](https://github.com/ClickHouse/ClickHouse/pull/42972) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
|
||||
* Fix a bug in CAST function parser [#42980](https://github.com/ClickHouse/ClickHouse/pull/42980) ([Nikolay Degterinsky](https://github.com/evillique)).
|
||||
* Fix old bug to remove `refs/head` from ref name [#42981](https://github.com/ClickHouse/ClickHouse/pull/42981) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add debug information to nightly builds [#42997](https://github.com/ClickHouse/ClickHouse/pull/42997) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Add `on: workflow_call` to debug CI [#43000](https://github.com/ClickHouse/ClickHouse/pull/43000) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Simple fixes for restart replica description [#43004](https://github.com/ClickHouse/ClickHouse/pull/43004) ([Igor Nikonov](https://github.com/devcrafter)).
|
||||
* Cleanup match code [#43006](https://github.com/ClickHouse/ClickHouse/pull/43006) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix TSan errors (correctly ignore _exit interception) [#43009](https://github.com/ClickHouse/ClickHouse/pull/43009) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* fix bandwidth throttlers initialization order [#43015](https://github.com/ClickHouse/ClickHouse/pull/43015) ([Sergei Trifonov](https://github.com/serxa)).
|
||||
* Add test for issue [#42520](https://github.com/ClickHouse/ClickHouse/issues/42520) [#43027](https://github.com/ClickHouse/ClickHouse/pull/43027) ([Robert Schulze](https://github.com/rschu1ze)).
|
||||
* Fix msan warning [#43065](https://github.com/ClickHouse/ClickHouse/pull/43065) ([Raúl Marín](https://github.com/Algunenano)).
|
||||
* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)).
|
||||
* Bump libdivide (to gain some new optimizations) [#44132](https://github.com/ClickHouse/ClickHouse/pull/44132) ([Azat Khuzhin](https://github.com/azat)).
|
||||
* Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
* Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
|
||||
|
||||
|
33
docs/en/engines/table-engines/integrations/deltalake.md
Normal file
33
docs/en/engines/table-engines/integrations/deltalake.md
Normal file
@ -0,0 +1,33 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/deltalake
|
||||
sidebar_label: DeltaLake
|
||||
---
|
||||
|
||||
# DeltaLake Table Engine
|
||||
|
||||
This engine provides a read-only integration with existing [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3.
|
||||
|
||||
## Create Table
|
||||
|
||||
Note that the Delta Lake table must already exist in S3, this command does not take DDL parameters to create a new table.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE deltalake
|
||||
ENGINE = DeltaLake(url, [aws_access_key_id, aws_secret_access_key,])
|
||||
```
|
||||
|
||||
**Engine parameters**
|
||||
|
||||
- `url` — Bucket url with path to the existing Delta Lake table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md)
|
||||
|
33
docs/en/engines/table-engines/integrations/hudi.md
Normal file
33
docs/en/engines/table-engines/integrations/hudi.md
Normal file
@ -0,0 +1,33 @@
|
||||
---
|
||||
slug: /en/engines/table-engines/integrations/hudi
|
||||
sidebar_label: Hudi
|
||||
---
|
||||
|
||||
# Hudi Table Engine
|
||||
|
||||
This engine provides a read-only integration with existing Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3.
|
||||
|
||||
## Create Table
|
||||
|
||||
Note that the Hudi table must already exist in S3, this command does not take DDL parameters to create a new table.
|
||||
|
||||
``` sql
|
||||
CREATE TABLE hudi_table
|
||||
ENGINE = Hudi(url, [aws_access_key_id, aws_secret_access_key,])
|
||||
```
|
||||
|
||||
**Engine parameters**
|
||||
|
||||
- `url` — Bucket url with the path to an existing Hudi table.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
CREATE TABLE hudi_table ENGINE=Hudi('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
|
||||
```
|
||||
|
||||
## See also
|
||||
|
||||
- [hudi table function](/docs/en/sql-reference/table-functions/hudi.md)
|
||||
|
@ -101,7 +101,7 @@ The `TabSeparated` format supports outputting total values (when using WITH TOTA
|
||||
SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT TabSeparated
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
2014-03-17 1406958
|
||||
2014-03-18 1383658
|
||||
2014-03-19 1405797
|
||||
@ -177,7 +177,7 @@ INSERT INTO nestedt Values ( 1, [1], ['a'])
|
||||
SELECT * FROM nestedt FORMAT TSV
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
1 [1] ['a']
|
||||
```
|
||||
|
||||
@ -761,7 +761,7 @@ SELECT * FROM json_as_string;
|
||||
|
||||
Result:
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌─json──────────────────────────────┐
|
||||
│ {"foo":{"bar":{"x":"y"},"baz":1}} │
|
||||
│ {} │
|
||||
@ -782,7 +782,7 @@ SELECT * FROM json_square_brackets;
|
||||
|
||||
Result:
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─field──────────────────────┐
|
||||
│ {"id": 1, "name": "name1"} │
|
||||
│ {"id": 2, "name": "name2"} │
|
||||
@ -1118,7 +1118,7 @@ When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHo
|
||||
|
||||
Consider the `UserActivity` table as an example:
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
|
||||
│ 4324182021466249494 │ 5 │ 146 │ -1 │
|
||||
│ 4324182021466249494 │ 6 │ 185 │ 1 │
|
||||
@ -1127,7 +1127,7 @@ Consider the `UserActivity` table as an example:
|
||||
|
||||
The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns:
|
||||
|
||||
``` text
|
||||
``` response
|
||||
{"UserID":"4324182021466249494","PageViews":5,"Duration":146,"Sign":-1}
|
||||
{"UserID":"4324182021466249494","PageViews":6,"Duration":185,"Sign":1}
|
||||
```
|
||||
@ -1171,7 +1171,7 @@ Without this setting, ClickHouse throws an exception.
|
||||
SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json'
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌─name────────────────────────────┬─value─┐
|
||||
│ input_format_import_nested_json │ 0 │
|
||||
└─────────────────────────────────┴───────┘
|
||||
@ -1181,7 +1181,7 @@ SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested
|
||||
INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
Code: 117. DB::Exception: Unknown field found while parsing JSONEachRow format: n: (at row 1)
|
||||
```
|
||||
|
||||
@ -1191,7 +1191,7 @@ INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"],
|
||||
SELECT * FROM json_each_row_nested
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌─n.s───────────┬─n.i────┐
|
||||
│ ['abc','def'] │ [1,23] │
|
||||
└───────────────┴────────┘
|
||||
@ -1265,7 +1265,7 @@ For input it uses the following correspondence between BSON types and ClickHouse
|
||||
| `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) |
|
||||
| `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
|
||||
|
||||
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
|
||||
Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
|
||||
Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
|
||||
|
||||
Note: this format don't work properly on Big-Endian platforms.
|
||||
@ -1300,7 +1300,7 @@ Example (shown for the [PrettyCompact](#prettycompact) format):
|
||||
SELECT * FROM t_null
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌─x─┬────y─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │
|
||||
└───┴──────┘
|
||||
@ -1312,7 +1312,7 @@ Rows are not escaped in Pretty\* formats. Example is shown for the [PrettyCompac
|
||||
SELECT 'String with \'quotes\' and \t character' AS Escaping_test
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌─Escaping_test────────────────────────┐
|
||||
│ String with 'quotes' and character │
|
||||
└──────────────────────────────────────┘
|
||||
@ -1327,7 +1327,7 @@ The Pretty format supports outputting total values (when using WITH TOTALS) and
|
||||
SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT PrettyCompact
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌──EventDate─┬───────c─┐
|
||||
│ 2014-03-17 │ 1406958 │
|
||||
│ 2014-03-18 │ 1383658 │
|
||||
@ -1488,7 +1488,7 @@ Example:
|
||||
SELECT * FROM t_null FORMAT Vertical
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
Row 1:
|
||||
──────
|
||||
x: 1
|
||||
@ -1501,7 +1501,7 @@ Rows are not escaped in Vertical format:
|
||||
SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
Row 1:
|
||||
──────
|
||||
test: string with 'quotes' and with some special
|
||||
@ -2319,25 +2319,22 @@ INSERT INTO `test2` VALUES (1),(2),(3);
|
||||
Queries:
|
||||
|
||||
```sql
|
||||
:) desc file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
|
||||
|
||||
DESCRIBE TABLE file(dump.sql, MySQLDump)
|
||||
SETTINGS input_format_mysql_dump_table_name = 'test2'
|
||||
|
||||
Query id: 25e66c89-e10a-42a8-9b42-1ee8bbbde5ef
|
||||
DESCRIBE TABLE file(dump.sql, MySQLDump) SETTINGS input_format_mysql_dump_table_name = 'test2'
|
||||
```
|
||||
|
||||
```text
|
||||
┌─name─┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ x │ Nullable(Int32) │ │ │ │ │ │
|
||||
└──────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
|
||||
```
|
||||
|
||||
:) select * from file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
|
||||
|
||||
```sql
|
||||
SELECT *
|
||||
FROM file(dump.sql, MySQLDump)
|
||||
SETTINGS input_format_mysql_dump_table_name = 'test2'
|
||||
```
|
||||
|
||||
Query id: 17d59664-ebce-4053-bb79-d46a516fb590
|
||||
|
||||
```text
|
||||
┌─x─┐
|
||||
│ 1 │
|
||||
│ 2 │
|
||||
|
@ -643,3 +643,106 @@ Default value: `0` (limit never applied).
|
||||
``` xml
|
||||
<min_marks_to_honor_max_concurrent_queries>10</min_marks_to_honor_max_concurrent_queries>
|
||||
```
|
||||
|
||||
## ratio_of_defaults_for_sparse_serialization {#ratio_of_defaults_for_sparse_serialization}
|
||||
|
||||
Minimal ratio of the number of _default_ values to the number of _all_ values in a column. Setting this value causes the column to be stored using sparse serializations.
|
||||
|
||||
If a column is sparse (contains mostly zeros), ClickHouse can encode it in a sparse format and automatically optimize calculations - the data does not require full decompression during queries. To enable this sparse serialization, define the `ratio_of_defaults_for_sparse_serialization` setting to be less than 1.0. If the value is greater than or equal to 1.0 (the default), then the columns will be always written using the normal full serialization.
|
||||
|
||||
Possible values:
|
||||
|
||||
- Float between 0 and 1 to enable sparse serialization
|
||||
- 1.0 (or greater) if you do not want to use sparse serialization
|
||||
|
||||
Default value: `1.0` (sparse serialization is disabled)
|
||||
|
||||
**Example**
|
||||
|
||||
Notice the `s` column in the following table is an empty string for 95% of the rows. In `my_regular_table` we do not use sparse serialization, and in `my_sparse_table` we set `ratio_of_defaults_for_sparse_serialization` to 0.95:
|
||||
|
||||
```sql
|
||||
CREATE TABLE my_regular_table
|
||||
(
|
||||
`id` UInt64,
|
||||
`s` String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id;
|
||||
|
||||
INSERT INTO my_regular_table
|
||||
SELECT
|
||||
number AS id,
|
||||
number % 20 = 0 ? toString(number): '' AS s
|
||||
FROM
|
||||
numbers(10000000);
|
||||
|
||||
|
||||
CREATE TABLE my_sparse_table
|
||||
(
|
||||
`id` UInt64,
|
||||
`s` String
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY id
|
||||
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.95;
|
||||
|
||||
INSERT INTO my_sparse_table
|
||||
SELECT
|
||||
number,
|
||||
number % 20 = 0 ? toString(number): ''
|
||||
FROM
|
||||
numbers(10000000);
|
||||
```
|
||||
|
||||
Notice the `s` column in `my_sparse_table` uses less storage space on disk:
|
||||
|
||||
```sql
|
||||
SELECT table, name, data_compressed_bytes, data_uncompressed_bytes FROM system.columns
|
||||
WHERE table LIKE 'my_%_table';
|
||||
```
|
||||
|
||||
```response
|
||||
┌─table────────────┬─name─┬─data_compressed_bytes─┬─data_uncompressed_bytes─┐
|
||||
│ my_regular_table │ id │ 37790741 │ 75488328 │
|
||||
│ my_regular_table │ s │ 2451377 │ 12683106 │
|
||||
│ my_sparse_table │ id │ 37790741 │ 75488328 │
|
||||
│ my_sparse_table │ s │ 2283454 │ 9855751 │
|
||||
└──────────────────┴──────┴───────────────────────┴─────────────────────────┘
|
||||
```
|
||||
|
||||
You can verify if a column is using the sparse encoding by viewing the `serialization_kind` column of the `system.parts_columns` table:
|
||||
|
||||
```sql
|
||||
SELECT column, serialization_kind FROM system.parts_columns
|
||||
WHERE table LIKE 'my_sparse_table';
|
||||
```
|
||||
|
||||
You can see which parts of `s` were stored using the sparse serialization:
|
||||
|
||||
```response
|
||||
┌─column─┬─serialization_kind─┐
|
||||
│ id │ Default │
|
||||
│ s │ Default │
|
||||
│ id │ Default │
|
||||
│ s │ Default │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
│ id │ Default │
|
||||
│ s │ Sparse │
|
||||
└────────┴────────────────────┘
|
||||
```
|
@ -17,10 +17,10 @@ Columns:
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.disks;
|
||||
SELECT * FROM system.disks;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─name────┬─path─────────────────┬───free_space─┬──total_space─┬─keep_free_space─┐
|
||||
│ default │ /var/lib/clickhouse/ │ 276392587264 │ 490652508160 │ 0 │
|
||||
└─────────┴──────────────────────┴──────────────┴──────────────┴─────────────────┘
|
||||
|
@ -15,10 +15,10 @@ Columns:
|
||||
|
||||
**Example**
|
||||
```sql
|
||||
:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
|
||||
SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
name: index_granularity
|
||||
|
42
docs/en/operations/system-tables/moves.md
Normal file
42
docs/en/operations/system-tables/moves.md
Normal file
@ -0,0 +1,42 @@
|
||||
---
|
||||
slug: /en/operations/system-tables/moves
|
||||
---
|
||||
# moves
|
||||
|
||||
The table contains information about in-progress [data part moves](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables. Each data part movement is represented by a single row.
|
||||
|
||||
Columns:
|
||||
|
||||
- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database.
|
||||
|
||||
- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part.
|
||||
|
||||
- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started.
|
||||
|
||||
- `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving.
|
||||
|
||||
- `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system.
|
||||
|
||||
- `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved.
|
||||
|
||||
- `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size.
|
||||
|
||||
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement.
|
||||
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
SELECT * FROM system.moves
|
||||
```
|
||||
|
||||
```response
|
||||
┌─database─┬─table─┬─────elapsed─┬─target_disk_name─┬─target_disk_path─┬─part_name─┬─part_size─┬─thread_id─┐
|
||||
│ default │ test2 │ 1.668056039 │ s3 │ ./disks/s3/ │ all_3_3_0 │ 136 │ 296146 │
|
||||
└──────────┴───────┴─────────────┴──────────────────┴──────────────────┴───────────┴───────────┴───────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine
|
||||
- [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes)
|
||||
- [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command
|
@ -12,10 +12,10 @@ Reads from this table are not parallelized.
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.numbers LIMIT 10;
|
||||
SELECT * FROM system.numbers LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─number─┐
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
|
@ -10,10 +10,10 @@ Used for tests.
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.numbers_mt LIMIT 10;
|
||||
SELECT * FROM system.numbers_mt LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─number─┐
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
|
@ -12,10 +12,10 @@ This is similar to the `DUAL` table found in other DBMSs.
|
||||
**Example**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.one LIMIT 10;
|
||||
SELECT * FROM system.one LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─dummy─┐
|
||||
│ 0 │
|
||||
└───────┘
|
||||
|
@ -20,10 +20,10 @@ Columns:
|
||||
- `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server).
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
|
||||
SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
is_initial_query: 1
|
||||
|
@ -207,9 +207,16 @@ Converts a date or date with time to a UInt8 number containing the number of the
|
||||
|
||||
Aliases: `DAYOFMONTH`, `DAY`.
|
||||
|
||||
## toDayOfWeek
|
||||
## toDayOfWeek(date\[,mode\])
|
||||
|
||||
Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
|
||||
Converts a date or date with time to a UInt8 number containing the number of the day of the week. The two-argument form of toDayOfWeek() enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or from 1-7. If the mode argument is ommited, the default mode is 0.
|
||||
|
||||
| Mode | First day of week | Range |
|
||||
|------|-------------------|------------------------------------------------|
|
||||
| 0 | Monday | 1-7, Monday = 1, Tuesday = 2, ..., Sunday = 7 |
|
||||
| 1 | Monday | 0-6, Monday = 0, Tuesday = 1, ..., Sunday = 6 |
|
||||
| 2 | Sunday | 0-6, Sunday = 0, Monday = 1, ..., Saturday = 6 |
|
||||
| 3 | Sunday | 1-7, Sunday = 1, Monday = 2, ..., Saturday = 7 |
|
||||
|
||||
Alias: `DAYOFWEEK`.
|
||||
|
||||
|
51
docs/en/sql-reference/table-functions/deltalake.md
Normal file
51
docs/en/sql-reference/table-functions/deltalake.md
Normal file
@ -0,0 +1,51 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/deltalake
|
||||
sidebar_label: DeltaLake
|
||||
---
|
||||
|
||||
# deltaLake Table Function
|
||||
|
||||
Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3.
|
||||
|
||||
## Syntax
|
||||
|
||||
``` sql
|
||||
deltaLake(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `url` — Bucket url with path to existing Delta Lake table in S3.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading data in the specified Delta Lake table in S3.
|
||||
|
||||
**Examples**
|
||||
|
||||
Selecting rows from the table in S3 `https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/`:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
URL,
|
||||
UserAgent
|
||||
FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/')
|
||||
WHERE URL IS NOT NULL
|
||||
LIMIT 2
|
||||
```
|
||||
|
||||
``` response
|
||||
┌─URL───────────────────────────────────────────────────────────────────┬─UserAgent─┐
|
||||
│ http://auto.ria.ua/search/index.kz/jobinmoscow/detail/55089/hasimages │ 1 │
|
||||
│ http://auto.ria.ua/search/index.kz/jobinmoscow.ru/gosushi │ 1 │
|
||||
└───────────────────────────────────────────────────────────────────────┴───────────┘
|
||||
```
|
||||
|
||||
**See Also**
|
||||
|
||||
- [DeltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
|
||||
|
@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext
|
||||
|
||||
**Query:**
|
||||
``` sql
|
||||
:) select * from format(JSONEachRow,
|
||||
SELECT * FROM format(JSONEachRow,
|
||||
$$
|
||||
{"a": "Hello", "b": 111}
|
||||
{"a": "World", "b": 123}
|
||||
@ -38,7 +38,7 @@ $$)
|
||||
|
||||
**Result:**
|
||||
|
||||
```text
|
||||
```response
|
||||
┌───b─┬─a─────┐
|
||||
│ 111 │ Hello │
|
||||
│ 123 │ World │
|
||||
@ -49,8 +49,7 @@ $$)
|
||||
|
||||
**Query:**
|
||||
```sql
|
||||
|
||||
:) desc format(JSONEachRow,
|
||||
DESC format(JSONEachRow,
|
||||
$$
|
||||
{"a": "Hello", "b": 111}
|
||||
{"a": "World", "b": 123}
|
||||
@ -61,7 +60,7 @@ $$)
|
||||
|
||||
**Result:**
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ b │ Nullable(Float64) │ │ │ │ │ │
|
||||
│ a │ Nullable(String) │ │ │ │ │ │
|
||||
|
31
docs/en/sql-reference/table-functions/hudi.md
Normal file
31
docs/en/sql-reference/table-functions/hudi.md
Normal file
@ -0,0 +1,31 @@
|
||||
---
|
||||
slug: /en/sql-reference/table-functions/hudi
|
||||
sidebar_label: Hudi
|
||||
---
|
||||
|
||||
# hudi Table Function
|
||||
|
||||
Provides a read-only table-like interface to Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3.
|
||||
|
||||
## Syntax
|
||||
|
||||
``` sql
|
||||
hudi(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
|
||||
```
|
||||
|
||||
## Arguments
|
||||
|
||||
- `url` — Bucket url with the path to an existing Hudi table in S3.
|
||||
- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
|
||||
- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
|
||||
- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
|
||||
- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension.
|
||||
|
||||
**Returned value**
|
||||
|
||||
A table with the specified structure for reading data in the specified Hudi table in S3.
|
||||
|
||||
**See Also**
|
||||
|
||||
- [Hudi engine](/docs/en/engines/table-engines/integrations/hudi.md)
|
||||
|
@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext
|
||||
|
||||
**Query:**
|
||||
``` sql
|
||||
:) select * from format(JSONEachRow,
|
||||
SELECT * FROM format(JSONEachRow,
|
||||
$$
|
||||
{"a": "Hello", "b": 111}
|
||||
{"a": "World", "b": 123}
|
||||
@ -38,7 +38,7 @@ $$)
|
||||
|
||||
**Result:**
|
||||
|
||||
```text
|
||||
```response
|
||||
┌───b─┬─a─────┐
|
||||
│ 111 │ Hello │
|
||||
│ 123 │ World │
|
||||
@ -49,8 +49,7 @@ $$)
|
||||
|
||||
**Query:**
|
||||
```sql
|
||||
|
||||
:) desc format(JSONEachRow,
|
||||
DESC format(JSONEachRow,
|
||||
$$
|
||||
{"a": "Hello", "b": 111}
|
||||
{"a": "World", "b": 123}
|
||||
@ -61,7 +60,7 @@ $$)
|
||||
|
||||
**Result:**
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ b │ Nullable(Float64) │ │ │ │ │ │
|
||||
│ a │ Nullable(String) │ │ │ │ │ │
|
||||
|
@ -69,7 +69,9 @@ ORDER BY key
|
||||
|
||||
向其中插入数据:
|
||||
|
||||
:) INSERT INTO summtt Values(1,1),(1,2),(2,1)
|
||||
``` sql
|
||||
INSERT INTO summtt Values(1,1),(1,2),(2,1)
|
||||
```
|
||||
|
||||
ClickHouse可能不会完整的汇总所有行([见下文](#data-processing)),因此我们在查询中使用了聚合函数 `sum` 和 `GROUP BY` 子句。
|
||||
|
||||
|
@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/disks
|
||||
**示例**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.disks;
|
||||
SELECT * FROM system.disks;
|
||||
```
|
||||
|
||||
```text
|
||||
|
@ -16,10 +16,10 @@ slug: /zh/operations/system-tables/merge_tree_settings
|
||||
|
||||
**示例**
|
||||
```sql
|
||||
:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
|
||||
SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
Row 1:
|
||||
──────
|
||||
name: index_granularity
|
||||
|
@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/numbers
|
||||
**示例**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.numbers LIMIT 10;
|
||||
SELECT * FROM system.numbers LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─number─┐
|
||||
│ 0 │
|
||||
│ 1 │
|
||||
|
@ -12,10 +12,10 @@ slug: /zh/operations/system-tables/one
|
||||
**示例**
|
||||
|
||||
```sql
|
||||
:) SELECT * FROM system.one LIMIT 10;
|
||||
SELECT * FROM system.one LIMIT 10;
|
||||
```
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─dummy─┐
|
||||
│ 0 │
|
||||
└───────┘
|
||||
|
@ -19,29 +19,25 @@ slug: /zh/sql-reference/data-types/array
|
||||
|
||||
创建数组示例:
|
||||
|
||||
:) SELECT array(1, 2) AS x, toTypeName(x)
|
||||
```sql
|
||||
SELECT array(1, 2) AS x, toTypeName(x)
|
||||
```
|
||||
|
||||
SELECT
|
||||
[1, 2] AS x,
|
||||
toTypeName(x)
|
||||
```response
|
||||
┌─x─────┬─toTypeName(array(1, 2))─┐
|
||||
│ [1,2] │ Array(UInt8) │
|
||||
└───────┴─────────────────────────┘
|
||||
```
|
||||
|
||||
┌─x─────┬─toTypeName(array(1, 2))─┐
|
||||
│ [1,2] │ Array(UInt8) │
|
||||
└───────┴─────────────────────────┘
|
||||
``` sql
|
||||
SELECT [1, 2] AS x, toTypeName(x)
|
||||
```
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
|
||||
:) SELECT [1, 2] AS x, toTypeName(x)
|
||||
|
||||
SELECT
|
||||
[1, 2] AS x,
|
||||
toTypeName(x)
|
||||
|
||||
┌─x─────┬─toTypeName([1, 2])─┐
|
||||
│ [1,2] │ Array(UInt8) │
|
||||
└───────┴────────────────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
```response
|
||||
┌─x─────┬─toTypeName([1, 2])─┐
|
||||
│ [1,2] │ Array(UInt8) │
|
||||
└───────┴────────────────────┘
|
||||
```
|
||||
|
||||
## 使用数据类型 {#shi-yong-shu-ju-lei-xing}
|
||||
|
||||
@ -50,26 +46,23 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素
|
||||
如果 ClickHouse 无法确定数据类型,它将产生异常。当尝试同时创建一个包含字符串和数字的数组时会发生这种情况 (`SELECT array(1, 'a')`)。
|
||||
|
||||
自动数据类型检测示例:
|
||||
```sql
|
||||
SELECT array(1, 2, NULL) AS x, toTypeName(x)
|
||||
```
|
||||
|
||||
:) SELECT array(1, 2, NULL) AS x, toTypeName(x)
|
||||
|
||||
SELECT
|
||||
[1, 2, NULL] AS x,
|
||||
toTypeName(x)
|
||||
|
||||
┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
|
||||
│ [1,2,NULL] │ Array(Nullable(UInt8)) │
|
||||
└────────────┴───────────────────────────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
```response
|
||||
┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
|
||||
│ [1,2,NULL] │ Array(Nullable(UInt8)) │
|
||||
└────────────┴───────────────────────────────┘
|
||||
```
|
||||
|
||||
如果您尝试创建不兼容的数据类型数组,ClickHouse 将引发异常:
|
||||
|
||||
:) SELECT array(1, 'a')
|
||||
```sql
|
||||
SELECT array(1, 'a')
|
||||
```
|
||||
|
||||
SELECT [1, 'a']
|
||||
|
||||
Received exception from server (version 1.1.54388):
|
||||
Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
|
||||
|
||||
0 rows in set. Elapsed: 0.246 sec.
|
||||
```response
|
||||
Received exception from server (version 1.1.54388):
|
||||
Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
|
||||
```
|
||||
|
@ -20,49 +20,64 @@ slug: /zh/sql-reference/data-types/enum
|
||||
|
||||
这个 `x` 列只能存储类型定义中列出的值:`'hello'`或`'world'`。如果您尝试保存任何其他值,ClickHouse 抛出异常。
|
||||
|
||||
:) INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
|
||||
```sql
|
||||
INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
|
||||
```
|
||||
|
||||
INSERT INTO t_enum VALUES
|
||||
```response
|
||||
Ok.
|
||||
|
||||
Ok.
|
||||
3 rows in set. Elapsed: 0.002 sec.
|
||||
```
|
||||
|
||||
3 rows in set. Elapsed: 0.002 sec.
|
||||
```sql
|
||||
INSERT INTO t_enum VALUES('a')
|
||||
```
|
||||
|
||||
:) insert into t_enum values('a')
|
||||
|
||||
INSERT INTO t_enum VALUES
|
||||
|
||||
|
||||
Exception on client:
|
||||
Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
|
||||
```response
|
||||
Exception on client:
|
||||
Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
|
||||
```
|
||||
|
||||
当您从表中查询数据时,ClickHouse 从 `Enum` 中输出字符串值。
|
||||
|
||||
SELECT * FROM t_enum
|
||||
```sql
|
||||
SELECT * FROM t_enum
|
||||
```
|
||||
|
||||
┌─x─────┐
|
||||
│ hello │
|
||||
│ world │
|
||||
│ hello │
|
||||
└───────┘
|
||||
```response
|
||||
┌─x─────┐
|
||||
│ hello │
|
||||
│ world │
|
||||
│ hello │
|
||||
└───────┘
|
||||
```
|
||||
|
||||
如果需要看到对应行的数值,则必须将 `Enum` 值转换为整数类型。
|
||||
|
||||
SELECT CAST(x, 'Int8') FROM t_enum
|
||||
```sql
|
||||
SELECT CAST(x, 'Int8') FROM t_enum
|
||||
```
|
||||
|
||||
┌─CAST(x, 'Int8')─┐
|
||||
│ 1 │
|
||||
│ 2 │
|
||||
│ 1 │
|
||||
└─────────────────┘
|
||||
```response
|
||||
┌─CAST(x, 'Int8')─┐
|
||||
│ 1 │
|
||||
│ 2 │
|
||||
│ 1 │
|
||||
└─────────────────┘
|
||||
```
|
||||
|
||||
在查询中创建枚举值,您还需要使用 `CAST`。
|
||||
|
||||
SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
|
||||
```sql
|
||||
SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
|
||||
```
|
||||
|
||||
┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
|
||||
│ Enum8('a' = 1, 'b' = 2) │
|
||||
└──────────────────────────────────────────────────────┘
|
||||
```response
|
||||
┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
|
||||
│ Enum8('a' = 1, 'b' = 2) │
|
||||
└──────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## 规则及用法 {#gui-ze-ji-yong-fa}
|
||||
|
||||
@ -72,15 +87,19 @@ slug: /zh/sql-reference/data-types/enum
|
||||
|
||||
`Enum` 包含在 [可为空](nullable.md) 类型中。因此,如果您使用此查询创建一个表
|
||||
|
||||
CREATE TABLE t_enum_nullable
|
||||
(
|
||||
x Nullable( Enum8('hello' = 1, 'world' = 2) )
|
||||
)
|
||||
ENGINE = TinyLog
|
||||
```sql
|
||||
CREATE TABLE t_enum_nullable
|
||||
(
|
||||
x Nullable( Enum8('hello' = 1, 'world' = 2) )
|
||||
)
|
||||
ENGINE = TinyLog
|
||||
```
|
||||
|
||||
不仅可以存储 `'hello'` 和 `'world'` ,还可以存储 `NULL`。
|
||||
|
||||
INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
|
||||
```sql
|
||||
INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
|
||||
```
|
||||
|
||||
在内存中,`Enum` 列的存储方式与相应数值的 `Int8` 或 `Int16` 相同。
|
||||
|
||||
|
@ -9,11 +9,11 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing
|
||||
|
||||
`Nothing` 类型也可以用来表示空数组:
|
||||
|
||||
``` bash
|
||||
:) SELECT toTypeName(array())
|
||||
|
||||
SELECT toTypeName([])
|
||||
```sql
|
||||
SELECT toTypeName(array())
|
||||
```
|
||||
|
||||
```response
|
||||
┌─toTypeName(array())─┐
|
||||
│ Array(Nothing) │
|
||||
└─────────────────────┘
|
||||
|
@ -17,17 +17,15 @@ slug: /zh/sql-reference/data-types/tuple
|
||||
|
||||
创建元组的示例:
|
||||
|
||||
:) SELECT tuple(1,'a') AS x, toTypeName(x)
|
||||
```sql
|
||||
SELECT tuple(1,'a') AS x, toTypeName(x)
|
||||
```
|
||||
|
||||
SELECT
|
||||
(1, 'a') AS x,
|
||||
toTypeName(x)
|
||||
|
||||
┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
|
||||
│ (1,'a') │ Tuple(UInt8, String) │
|
||||
└─────────┴───────────────────────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.021 sec.
|
||||
```response
|
||||
┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
|
||||
│ (1,'a') │ Tuple(UInt8, String) │
|
||||
└─────────┴───────────────────────────┘
|
||||
```
|
||||
|
||||
## 元组中的数据类型 {#yuan-zu-zhong-de-shu-ju-lei-xing}
|
||||
|
||||
@ -35,14 +33,12 @@ slug: /zh/sql-reference/data-types/tuple
|
||||
|
||||
自动数据类型检测示例:
|
||||
|
||||
SELECT tuple(1, NULL) AS x, toTypeName(x)
|
||||
```sql
|
||||
SELECT tuple(1, NULL) AS x, toTypeName(x)
|
||||
```
|
||||
|
||||
SELECT
|
||||
(1, NULL) AS x,
|
||||
toTypeName(x)
|
||||
|
||||
┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
|
||||
│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
|
||||
└──────────┴─────────────────────────────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
```response
|
||||
┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
|
||||
│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
|
||||
└──────────┴─────────────────────────────────┘
|
||||
```
|
||||
|
@ -22,24 +22,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls
|
||||
|
||||
存在以下内容的表
|
||||
|
||||
┌─x─┬────y─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2 │ 3 │
|
||||
└───┴──────┘
|
||||
```response
|
||||
┌─x─┬────y─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2 │ 3 │
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
对其进行查询
|
||||
|
||||
:) SELECT x FROM t_null WHERE isNull(y)
|
||||
```sql
|
||||
SELECT x FROM t_null WHERE isNull(y)
|
||||
```
|
||||
|
||||
SELECT x
|
||||
FROM t_null
|
||||
WHERE isNull(y)
|
||||
|
||||
┌─x─┐
|
||||
│ 1 │
|
||||
└───┘
|
||||
|
||||
1 rows in set. Elapsed: 0.010 sec.
|
||||
```response
|
||||
┌─x─┐
|
||||
│ 1 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## isNotNull {#isnotnull}
|
||||
|
||||
@ -60,24 +60,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls
|
||||
|
||||
存在以下内容的表
|
||||
|
||||
┌─x─┬────y─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2 │ 3 │
|
||||
└───┴──────┘
|
||||
```response
|
||||
┌─x─┬────y─┐
|
||||
│ 1 │ ᴺᵁᴸᴸ │
|
||||
│ 2 │ 3 │
|
||||
└───┴──────┘
|
||||
```
|
||||
|
||||
对其进行查询
|
||||
|
||||
:) SELECT x FROM t_null WHERE isNotNull(y)
|
||||
```sql
|
||||
SELECT x FROM t_null WHERE isNotNull(y)
|
||||
```
|
||||
|
||||
SELECT x
|
||||
FROM t_null
|
||||
WHERE isNotNull(y)
|
||||
|
||||
┌─x─┐
|
||||
│ 2 │
|
||||
└───┘
|
||||
|
||||
1 rows in set. Elapsed: 0.010 sec.
|
||||
```response
|
||||
┌─x─┐
|
||||
│ 2 │
|
||||
└───┘
|
||||
```
|
||||
|
||||
## 合并 {#coalesce}
|
||||
|
||||
@ -98,26 +98,27 @@ slug: /zh/sql-reference/functions/functions-for-nulls
|
||||
|
||||
考虑可以指定多种联系客户的方式的联系人列表。
|
||||
|
||||
┌─name─────┬─mail─┬─phone─────┬──icq─┐
|
||||
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────┴───────────┴──────┘
|
||||
```response
|
||||
┌─name─────┬─mail─┬─phone─────┬──icq─┐
|
||||
│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────┴───────────┴──────┘
|
||||
```
|
||||
|
||||
`mail`和`phone`字段是String类型,但`icq`字段是`UInt32`,所以它需要转换为`String`。
|
||||
|
||||
从联系人列表中获取客户的第一个可用联系方式:
|
||||
|
||||
:) SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
|
||||
```sql
|
||||
SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
|
||||
```
|
||||
|
||||
SELECT coalesce(mail, phone, CAST(icq, 'Nullable(String)'))
|
||||
FROM aBook
|
||||
|
||||
┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
|
||||
│ client 1 │ 123-45-67 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────────────────────────────────────────────────────┘
|
||||
|
||||
2 rows in set. Elapsed: 0.006 sec.
|
||||
```response
|
||||
┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
|
||||
│ client 1 │ 123-45-67 │
|
||||
│ client 2 │ ᴺᵁᴸᴸ │
|
||||
└──────────┴──────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## ifNull {#ifnull}
|
||||
|
||||
|
@ -33,7 +33,7 @@ slug: /zh/sql-reference/functions/other-functions
|
||||
SELECT 'some/long/path/to/file' AS a, basename(a)
|
||||
```
|
||||
|
||||
``` text
|
||||
```response
|
||||
┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
|
||||
│ some\long\path\to\file │ file │
|
||||
└────────────────────────┴────────────────────────────────────────┘
|
||||
@ -43,7 +43,7 @@ SELECT 'some/long/path/to/file' AS a, basename(a)
|
||||
SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
|
||||
```
|
||||
|
||||
``` text
|
||||
```response
|
||||
┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
|
||||
│ some\long\path\to\file │ file │
|
||||
└────────────────────────┴────────────────────────────────────────┘
|
||||
@ -53,7 +53,7 @@ SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
|
||||
SELECT 'some-file-name' AS a, basename(a)
|
||||
```
|
||||
|
||||
``` text
|
||||
```response
|
||||
┌─a──────────────┬─basename('some-file-name')─┐
|
||||
│ some-file-name │ some-file-name │
|
||||
└────────────────┴────────────────────────────┘
|
||||
@ -398,23 +398,25 @@ FROM
|
||||
|
||||
**`toTypeName ' 与 ' toColumnTypeName`的区别示例**
|
||||
|
||||
:) select toTypeName(cast('2018-01-01 01:02:03' AS DateTime))
|
||||
```sql
|
||||
SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
|
||||
```
|
||||
|
||||
SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
|
||||
```response
|
||||
┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
|
||||
│ DateTime │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
|
||||
│ DateTime │
|
||||
└─────────────────────────────────────────────────────┘
|
||||
```sql
|
||||
SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
|
||||
```
|
||||
|
||||
1 rows in set. Elapsed: 0.008 sec.
|
||||
|
||||
:) select toColumnTypeName(cast('2018-01-01 01:02:03' AS DateTime))
|
||||
|
||||
SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
|
||||
|
||||
┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
|
||||
│ Const(UInt32) │
|
||||
└───────────────────────────────────────────────────────────┘
|
||||
```response
|
||||
┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
|
||||
│ Const(UInt32) │
|
||||
└───────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
该示例显示`DateTime`数据类型作为`Const(UInt32)`存储在内存中。
|
||||
|
||||
@ -460,26 +462,25 @@ FROM
|
||||
|
||||
**示例**
|
||||
|
||||
:) SELECT defaultValueOfArgumentType( CAST(1 AS Int8) )
|
||||
```sql
|
||||
SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
|
||||
```
|
||||
|
||||
SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
|
||||
```response
|
||||
┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
|
||||
│ 0 │
|
||||
└─────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
|
||||
│ 0 │
|
||||
└─────────────────────────────────────────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
|
||||
:) SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) )
|
||||
|
||||
SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
|
||||
|
||||
┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
```sql
|
||||
SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
|
||||
```
|
||||
|
||||
```response
|
||||
┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
|
||||
│ ᴺᵁᴸᴸ │
|
||||
└───────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
## indexHint {#indexhint}
|
||||
输出符合索引选择范围内的所有数据,同时不实用参数中的表达式进行过滤。
|
||||
@ -496,7 +497,8 @@ FROM
|
||||
|
||||
```
|
||||
SELECT count() FROM ontime
|
||||
|
||||
```
|
||||
```response
|
||||
┌─count()─┐
|
||||
│ 4276457 │
|
||||
└─────────┘
|
||||
@ -506,9 +508,11 @@ SELECT count() FROM ontime
|
||||
|
||||
对该表进行如下的查询:
|
||||
|
||||
```sql
|
||||
SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
|
||||
```
|
||||
:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
|
||||
|
||||
```response
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
@ -530,9 +534,11 @@ ORDER BY k ASC
|
||||
|
||||
在这个查询中,由于没有使用索引,所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询:
|
||||
|
||||
```sql
|
||||
SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
|
||||
```
|
||||
:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
|
||||
|
||||
```response
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
@ -552,9 +558,11 @@ ORDER BY k ASC
|
||||
|
||||
现在将表达式`k = '2017-09-15'`传递给`indexHint`函数:
|
||||
|
||||
```sql
|
||||
SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
|
||||
```
|
||||
:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
|
||||
|
||||
```response
|
||||
SELECT
|
||||
FlightDate AS k,
|
||||
count()
|
||||
|
@ -21,13 +21,13 @@ UUID类型的值。
|
||||
|
||||
此示例演示如何在表中创建UUID类型的列,并对其写入数据。
|
||||
|
||||
``` sql
|
||||
:) CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
|
||||
|
||||
:) INSERT INTO t_uuid SELECT generateUUIDv4()
|
||||
|
||||
:) SELECT * FROM t_uuid
|
||||
```sql
|
||||
CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
|
||||
INSERT INTO t_uuid SELECT generateUUIDv4()
|
||||
SELECT * FROM t_uuid
|
||||
```
|
||||
|
||||
```response
|
||||
┌────────────────────────────────────x─┐
|
||||
│ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │
|
||||
└──────────────────────────────────────┘
|
||||
@ -47,9 +47,11 @@ UUID类型的值
|
||||
|
||||
**使用示例**
|
||||
|
||||
``` sql
|
||||
:) SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
|
||||
```sql
|
||||
SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
|
||||
```
|
||||
|
||||
```response
|
||||
┌─────────────────────────────────uuid─┐
|
||||
│ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
|
||||
└──────────────────────────────────────┘
|
||||
@ -70,10 +72,12 @@ UUIDStringToNum(String)
|
||||
**使用示例**
|
||||
|
||||
``` sql
|
||||
:) SELECT
|
||||
SELECT
|
||||
'612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
|
||||
UUIDStringToNum(uuid) AS bytes
|
||||
```
|
||||
|
||||
```response
|
||||
┌─uuid─────────────────────────────────┬─bytes────────────┐
|
||||
│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
|
||||
└──────────────────────────────────────┴──────────────────┘
|
||||
@ -97,7 +101,8 @@ UUIDNumToString(FixedString(16))
|
||||
SELECT
|
||||
'a/<@];!~p{jTj={)' AS bytes,
|
||||
UUIDNumToString(toFixedString(bytes, 16)) AS uuid
|
||||
|
||||
```
|
||||
```response
|
||||
┌─bytes────────────┬─uuid─────────────────────────────────┐
|
||||
│ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
|
||||
└──────────────────┴──────────────────────────────────────┘
|
||||
|
@ -143,7 +143,7 @@ SELECT
|
||||
FROM test.Orders;
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
|
||||
│ 2008 │ 10 │ 11 │ 13 │ 23 │ 44 │
|
||||
└───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
|
||||
@ -161,7 +161,7 @@ FROM test.Orders;
|
||||
SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
|
||||
```
|
||||
|
||||
``` text
|
||||
``` response
|
||||
┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
|
||||
│ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │
|
||||
└─────────────────────┴────────────────────────────────────────────────────────┘
|
||||
@ -226,18 +226,14 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。
|
||||
|
||||
<!-- -->
|
||||
|
||||
``` bash
|
||||
:) SELECT x+100 FROM t_null WHERE y IS NULL
|
||||
|
||||
SELECT x + 100
|
||||
FROM t_null
|
||||
WHERE isNull(y)
|
||||
``` sql
|
||||
SELECT x+100 FROM t_null WHERE y IS NULL
|
||||
```
|
||||
|
||||
``` response
|
||||
┌─plus(x, 100)─┐
|
||||
│ 101 │
|
||||
└──────────────┘
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
```
|
||||
|
||||
### IS NOT NULL {#is-not-null}
|
||||
@ -249,16 +245,12 @@ WHERE isNull(y)
|
||||
|
||||
<!-- -->
|
||||
|
||||
``` bash
|
||||
:) SELECT * FROM t_null WHERE y IS NOT NULL
|
||||
|
||||
SELECT *
|
||||
FROM t_null
|
||||
WHERE isNotNull(y)
|
||||
``` sql
|
||||
SELECT * FROM t_null WHERE y IS NOT NULL
|
||||
```
|
||||
|
||||
``` response
|
||||
┌─x─┬─y─┐
|
||||
│ 2 │ 3 │
|
||||
└───┴───┘
|
||||
|
||||
1 rows in set. Elapsed: 0.002 sec.
|
||||
```
|
||||
|
@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext
|
||||
|
||||
**Query:**
|
||||
``` sql
|
||||
:) select * from format(JSONEachRow,
|
||||
SELECT * FROM format(JSONEachRow,
|
||||
$$
|
||||
{"a": "Hello", "b": 111}
|
||||
{"a": "World", "b": 123}
|
||||
@ -38,7 +38,7 @@ $$)
|
||||
|
||||
**Result:**
|
||||
|
||||
```text
|
||||
```response
|
||||
┌───b─┬─a─────┐
|
||||
│ 111 │ Hello │
|
||||
│ 123 │ World │
|
||||
@ -49,8 +49,7 @@ $$)
|
||||
|
||||
**Query:**
|
||||
```sql
|
||||
|
||||
:) desc format(JSONEachRow,
|
||||
DESC format(JSONEachRow,
|
||||
$$
|
||||
{"a": "Hello", "b": 111}
|
||||
{"a": "World", "b": 123}
|
||||
@ -61,7 +60,7 @@ $$)
|
||||
|
||||
**Result:**
|
||||
|
||||
```text
|
||||
```response
|
||||
┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
|
||||
│ b │ Nullable(Float64) │ │ │ │ │ │
|
||||
│ a │ Nullable(String) │ │ │ │ │ │
|
||||
|
@ -117,8 +117,9 @@ public:
|
||||
if (!function_name_if_constant_is_negative.empty() &&
|
||||
left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
|
||||
{
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
|
||||
lower_function_name = function_name_if_constant_is_negative;
|
||||
}
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);
|
||||
|
||||
auto inner_function = aggregate_function_arguments_nodes[0];
|
||||
auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
|
||||
@ -133,8 +134,9 @@ public:
|
||||
if (!function_name_if_constant_is_negative.empty() &&
|
||||
right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal))
|
||||
{
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
|
||||
lower_function_name = function_name_if_constant_is_negative;
|
||||
}
|
||||
resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);
|
||||
|
||||
auto inner_function = aggregate_function_arguments_nodes[0];
|
||||
auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
|
||||
@ -145,13 +147,13 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name)
|
||||
static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name)
|
||||
{
|
||||
auto function_aggregate_function = function_node.getAggregateFunction();
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
|
||||
function_aggregate_function->getArgumentTypes(),
|
||||
{ argument->getResultType() },
|
||||
function_aggregate_function->getParameters(),
|
||||
properties);
|
||||
|
||||
|
@ -2020,7 +2020,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
|
||||
|
||||
StorageID storage_id(database_name, table_name);
|
||||
storage_id = context->resolveStorageID(storage_id);
|
||||
auto storage = DatabaseCatalog::instance().getTable(storage_id, context);
|
||||
auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context);
|
||||
if (!storage)
|
||||
return {};
|
||||
|
||||
auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
|
||||
auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
|
||||
|
||||
@ -2867,7 +2870,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
|
||||
|
||||
if (resolved_identifier)
|
||||
{
|
||||
bool is_cte = resolved_identifier->as<QueryNode>() && resolved_identifier->as<QueryNode>()->isCTE();
|
||||
auto * subquery_node = resolved_identifier->as<QueryNode>();
|
||||
auto * union_node = resolved_identifier->as<UnionNode>();
|
||||
|
||||
bool is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
|
||||
|
||||
/** From parent scopes we can resolve table identifiers only as CTE.
|
||||
* Example: SELECT (SELECT 1 FROM a) FROM test_table AS a;
|
||||
@ -4084,8 +4090,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
auto & in_second_argument = function_in_arguments_nodes[1];
|
||||
auto * table_node = in_second_argument->as<TableNode>();
|
||||
auto * table_function_node = in_second_argument->as<TableFunctionNode>();
|
||||
auto * query_node = in_second_argument->as<QueryNode>();
|
||||
auto * union_node = in_second_argument->as<UnionNode>();
|
||||
|
||||
if (table_node && dynamic_cast<StorageSet *>(table_node->getStorage().get()) != nullptr)
|
||||
{
|
||||
@ -4118,15 +4122,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
|
||||
|
||||
in_second_argument = std::move(in_second_argument_query_node);
|
||||
}
|
||||
else if (query_node || union_node)
|
||||
else
|
||||
{
|
||||
IdentifierResolveScope subquery_scope(in_second_argument, &scope /*parent_scope*/);
|
||||
subquery_scope.subquery_depth = scope.subquery_depth + 1;
|
||||
|
||||
if (query_node)
|
||||
resolveQuery(in_second_argument, subquery_scope);
|
||||
else if (union_node)
|
||||
resolveUnion(in_second_argument, subquery_scope);
|
||||
resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4714,13 +4712,29 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
{
|
||||
node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier;
|
||||
|
||||
/// If table identifier is resolved as CTE clone it
|
||||
bool resolved_as_cte = node && node->as<QueryNode>() && node->as<QueryNode>()->isCTE();
|
||||
/// If table identifier is resolved as CTE clone it and resolve
|
||||
auto * subquery_node = node->as<QueryNode>();
|
||||
auto * union_node = node->as<UnionNode>();
|
||||
bool resolved_as_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
|
||||
|
||||
if (resolved_as_cte)
|
||||
{
|
||||
node = node->clone();
|
||||
node->as<QueryNode &>().setIsCTE(false);
|
||||
subquery_node = node->as<QueryNode>();
|
||||
union_node = node->as<UnionNode>();
|
||||
|
||||
if (subquery_node)
|
||||
subquery_node->setIsCTE(false);
|
||||
else
|
||||
union_node->setIsCTE(false);
|
||||
|
||||
IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
|
||||
subquery_scope.subquery_depth = scope.subquery_depth + 1;
|
||||
|
||||
if (subquery_node)
|
||||
resolveQuery(node, subquery_scope);
|
||||
else
|
||||
resolveUnion(node, subquery_scope);
|
||||
}
|
||||
}
|
||||
|
||||
@ -4836,6 +4850,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
|
||||
subquery_scope.subquery_depth = scope.subquery_depth + 1;
|
||||
|
||||
++subquery_counter;
|
||||
std::string projection_name = "_subquery_" + std::to_string(subquery_counter);
|
||||
|
||||
if (node_type == QueryTreeNodeType::QUERY)
|
||||
resolveQuery(node, subquery_scope);
|
||||
else
|
||||
@ -4844,9 +4861,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
|
||||
if (!allow_table_expression)
|
||||
evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context);
|
||||
|
||||
++subquery_counter;
|
||||
if (result_projection_names.empty())
|
||||
result_projection_names.push_back("_subquery_" + std::to_string(subquery_counter));
|
||||
result_projection_names.push_back(std::move(projection_name));
|
||||
|
||||
break;
|
||||
}
|
||||
@ -5193,11 +5209,6 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
|
||||
|
||||
if (resolved_identifier_query_node || resolved_identifier_union_node)
|
||||
{
|
||||
if (resolved_identifier_query_node)
|
||||
resolved_identifier_query_node->setIsCTE(false);
|
||||
else
|
||||
resolved_identifier_union_node->setIsCTE(false);
|
||||
|
||||
if (table_expression_modifiers.has_value())
|
||||
{
|
||||
throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
|
||||
@ -5434,14 +5445,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
|
||||
[[fallthrough]];
|
||||
case QueryTreeNodeType::UNION:
|
||||
{
|
||||
IdentifierResolveScope subquery_scope(join_tree_node, &scope);
|
||||
subquery_scope.subquery_depth = scope.subquery_depth + 1;
|
||||
|
||||
if (from_node_type == QueryTreeNodeType::QUERY)
|
||||
resolveQuery(join_tree_node, subquery_scope);
|
||||
else if (from_node_type == QueryTreeNodeType::UNION)
|
||||
resolveUnion(join_tree_node, subquery_scope);
|
||||
|
||||
resolveExpressionNode(join_tree_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
|
||||
break;
|
||||
}
|
||||
case QueryTreeNodeType::TABLE_FUNCTION:
|
||||
|
@ -78,9 +78,9 @@ public:
|
||||
throw Exception(
|
||||
ErrorCodes::CANNOT_BACKUP_TABLE,
|
||||
"Intersected parts detected: {} on replica {} and {} on replica {}",
|
||||
part.info.getPartName(),
|
||||
part.info.getPartNameForLogs(),
|
||||
*part.replica_name,
|
||||
new_part_info.getPartName(),
|
||||
new_part_info.getPartNameForLogs(),
|
||||
*replica_name);
|
||||
}
|
||||
++last_it;
|
||||
|
@ -39,6 +39,15 @@ enum class WeekModeFlag : UInt8
|
||||
};
|
||||
using YearWeek = std::pair<UInt16, UInt8>;
|
||||
|
||||
/// Modes for toDayOfWeek() function.
|
||||
enum class WeekDayMode
|
||||
{
|
||||
WeekStartsMonday1 = 0,
|
||||
WeekStartsMonday0 = 1,
|
||||
WeekStartsSunday0 = 2,
|
||||
WeekStartsSunday1 = 3
|
||||
};
|
||||
|
||||
/** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on.
|
||||
* First time was implemented for OLAPServer, that needed to do billions of such transformations.
|
||||
*/
|
||||
@ -619,9 +628,25 @@ public:
|
||||
template <typename DateOrTime>
|
||||
inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
|
||||
|
||||
/// 1-based, starts on Monday
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
|
||||
{
|
||||
WeekDayMode mode = check_week_day_mode(week_day_mode);
|
||||
UInt8 res = toDayOfWeek(v);
|
||||
|
||||
bool start_from_sunday = (mode == WeekDayMode::WeekStartsSunday0 || mode == WeekDayMode::WeekStartsSunday1);
|
||||
bool zero_based = (mode == WeekDayMode::WeekStartsMonday0 || mode == WeekDayMode::WeekStartsSunday0);
|
||||
if (start_from_sunday)
|
||||
res = res % 7 + 1;
|
||||
if (zero_based)
|
||||
--res;
|
||||
return res;
|
||||
}
|
||||
|
||||
template <typename DateOrTime>
|
||||
inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
|
||||
|
||||
@ -844,6 +869,13 @@ public:
|
||||
return week_format;
|
||||
}
|
||||
|
||||
/// Check and change mode to effective.
|
||||
inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
|
||||
{
|
||||
return static_cast<WeekDayMode>(mode & 3);
|
||||
}
|
||||
|
||||
|
||||
/** Calculate weekday from d.
|
||||
* Returns 0 for monday, 1 for tuesday...
|
||||
*/
|
||||
|
@ -16,7 +16,7 @@ TypeIndexesSet getTypesIndexes(const DataTypes & types)
|
||||
return type_indexes;
|
||||
}
|
||||
|
||||
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types)
|
||||
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types)
|
||||
{
|
||||
TypeIndexesSet type_indexes = getTypesIndexes(types);
|
||||
|
||||
@ -166,7 +166,7 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
|
||||
void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback)
|
||||
{
|
||||
DataTypes types = {type};
|
||||
transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {});
|
||||
transformTypesRecursively(types, [callback](auto & data_types, TypeIndexesSet &){ callback(data_types[0]); }, {});
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ namespace DB
|
||||
/// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types.
|
||||
/// Function transform_simple_types will be applied to resulting simple types after all recursive calls.
|
||||
/// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
|
||||
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types);
|
||||
void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types);
|
||||
|
||||
void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback);
|
||||
|
||||
|
@ -131,19 +131,21 @@ namespace JSONUtils
|
||||
{
|
||||
skipWhitespaceIfAny(in);
|
||||
assertChar('{', in);
|
||||
skipWhitespaceIfAny(in);
|
||||
bool first = true;
|
||||
NamesAndTypesList names_and_types;
|
||||
String field;
|
||||
while (!in.eof() && *in.position() != '}')
|
||||
{
|
||||
if (!first)
|
||||
skipComma(in);
|
||||
assertChar(',', in);
|
||||
else
|
||||
first = false;
|
||||
|
||||
auto name = readFieldName(in);
|
||||
auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
|
||||
names_and_types.emplace_back(name, type);
|
||||
skipWhitespaceIfAny(in);
|
||||
}
|
||||
|
||||
if (in.eof())
|
||||
@ -157,17 +159,19 @@ namespace JSONUtils
|
||||
{
|
||||
skipWhitespaceIfAny(in);
|
||||
assertChar('[', in);
|
||||
skipWhitespaceIfAny(in);
|
||||
bool first = true;
|
||||
DataTypes types;
|
||||
String field;
|
||||
while (!in.eof() && *in.position() != ']')
|
||||
{
|
||||
if (!first)
|
||||
skipComma(in);
|
||||
assertChar(',', in);
|
||||
else
|
||||
first = false;
|
||||
auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
|
||||
types.push_back(std::move(type));
|
||||
skipWhitespaceIfAny(in);
|
||||
}
|
||||
|
||||
if (in.eof())
|
||||
|
@ -44,9 +44,16 @@ namespace
|
||||
return true;
|
||||
}
|
||||
|
||||
void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
type_indexes.clear();
|
||||
for (const auto & type : data_types)
|
||||
type_indexes.insert(type->getTypeId());
|
||||
}
|
||||
|
||||
/// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing.
|
||||
/// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String]
|
||||
void transformNothingSimpleTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
void transformNothingSimpleTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
/// Check if we have both Nothing and non Nothing types.
|
||||
if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1)
|
||||
@ -67,24 +74,48 @@ namespace
|
||||
if (isNothing(type))
|
||||
type = not_nothing_type;
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Nothing);
|
||||
}
|
||||
|
||||
/// If we have both Int64 and Float64 types, convert all Int64 to Float64.
|
||||
void transformIntegersAndFloatsToFloats(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
/// If we have both Int64 and UInt64, convert all Int64 to UInt64,
|
||||
/// because UInt64 is inferred only in case of Int64 overflow.
|
||||
void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::Float64))
|
||||
if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64))
|
||||
return;
|
||||
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isInteger(type))
|
||||
if (WhichDataType(type).isInt64())
|
||||
type = std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Int64);
|
||||
}
|
||||
|
||||
/// If we have both Int64 and Float64 types, convert all Int64 to Float64.
|
||||
void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
bool have_floats = type_indexes.contains(TypeIndex::Float64);
|
||||
bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64);
|
||||
if (!have_integers || !have_floats)
|
||||
return;
|
||||
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
WhichDataType which(type);
|
||||
if (which.isInt64() || which.isUInt64())
|
||||
type = std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Int64);
|
||||
type_indexes.erase(TypeIndex::UInt64);
|
||||
}
|
||||
|
||||
/// If we have only Date and DateTime types, convert Date to DateTime,
|
||||
/// otherwise, convert all Date and DateTime to String.
|
||||
void transformDatesAndDateTimes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
bool have_dates = type_indexes.contains(TypeIndex::Date);
|
||||
bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64);
|
||||
@ -98,6 +129,8 @@ namespace
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Date);
|
||||
type_indexes.erase(TypeIndex::DateTime);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -108,16 +141,18 @@ namespace
|
||||
if (isDate(type))
|
||||
type = std::make_shared<DataTypeDateTime64>(9);
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Date);
|
||||
}
|
||||
}
|
||||
|
||||
/// If we have numbers (Int64/Float64) and String types and numbers were parsed from String,
|
||||
/// If we have numbers (Int64/UInt64/Float64) and String types and numbers were parsed from String,
|
||||
/// convert all numbers to String.
|
||||
void transformJSONNumbersBackToString(
|
||||
DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
|
||||
DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
|
||||
{
|
||||
bool have_strings = type_indexes.contains(TypeIndex::String);
|
||||
bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::Float64);
|
||||
bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64) || type_indexes.contains(TypeIndex::Float64);
|
||||
if (!have_strings || !have_numbers)
|
||||
return;
|
||||
|
||||
@ -128,36 +163,43 @@ namespace
|
||||
|| json_info->numbers_parsed_from_json_strings.contains(type.get())))
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
updateTypeIndexes(data_types, type_indexes);
|
||||
}
|
||||
|
||||
/// If we have both Bool and number (Int64/Float64) types,
|
||||
/// convert all Bool to Int64/Float64.
|
||||
void transformBoolsAndNumbersToNumbers(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
/// If we have both Bool and number (Int64/UInt64/Float64) types,
|
||||
/// convert all Bool to Int64/UInt64/Float64.
|
||||
void transformBoolsAndNumbersToNumbers(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
bool have_floats = type_indexes.contains(TypeIndex::Float64);
|
||||
bool have_integers = type_indexes.contains(TypeIndex::Int64);
|
||||
bool have_signed_integers = type_indexes.contains(TypeIndex::Int64);
|
||||
bool have_unsigned_integers = type_indexes.contains(TypeIndex::UInt64);
|
||||
bool have_bools = type_indexes.contains(TypeIndex::UInt8);
|
||||
/// Check if we have both Bool and Integer/Float.
|
||||
if (!have_bools || (!have_integers && !have_floats))
|
||||
if (!have_bools || (!have_signed_integers && !have_unsigned_integers && !have_floats))
|
||||
return;
|
||||
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isBool(type))
|
||||
{
|
||||
if (have_integers)
|
||||
if (have_signed_integers)
|
||||
type = std::make_shared<DataTypeInt64>();
|
||||
else if (have_unsigned_integers)
|
||||
type = std::make_shared<DataTypeUInt64>();
|
||||
else
|
||||
type = std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::UInt8);
|
||||
}
|
||||
|
||||
/// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
|
||||
/// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
|
||||
/// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
|
||||
/// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))])
|
||||
void transformNothingComplexTypes(DataTypes & data_types)
|
||||
void transformNothingComplexTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
bool have_nothing = false;
|
||||
DataTypePtr not_nothing_type = nullptr;
|
||||
@ -177,10 +219,12 @@ namespace
|
||||
if (isNothing(removeNullable(type)))
|
||||
type = not_nothing_type;
|
||||
}
|
||||
|
||||
updateTypeIndexes(data_types, type_indexes);
|
||||
}
|
||||
|
||||
/// If we have both Nullable and non Nullable types, make all types Nullable
|
||||
void transformNullableTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
void transformNullableTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
if (!type_indexes.contains(TypeIndex::Nullable))
|
||||
return;
|
||||
@ -190,6 +234,8 @@ namespace
|
||||
if (type->canBeInsideNullable())
|
||||
type = makeNullable(type);
|
||||
}
|
||||
|
||||
updateTypeIndexes(data_types, type_indexes);
|
||||
}
|
||||
|
||||
/// If we have Tuple with the same nested types like Tuple(Int64, Int64),
|
||||
@ -197,11 +243,12 @@ namespace
|
||||
/// For example when we had type Tuple(Int64, Nullable(Nothing)) and we
|
||||
/// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will
|
||||
/// also transform it to Array(Nullable(Int64))
|
||||
void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
if (!type_indexes.contains(TypeIndex::Tuple))
|
||||
return;
|
||||
|
||||
bool remove_tuple_index = true;
|
||||
for (auto & type : data_types)
|
||||
{
|
||||
if (isTuple(type))
|
||||
@ -209,8 +256,13 @@ namespace
|
||||
const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
|
||||
if (checkIfTypesAreEqual(tuple_type->getElements()))
|
||||
type = std::make_shared<DataTypeArray>(tuple_type->getElements().back());
|
||||
else
|
||||
remove_tuple_index = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (remove_tuple_index)
|
||||
type_indexes.erase(TypeIndex::Tuple);
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
@ -221,7 +273,7 @@ namespace
|
||||
/// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)]
|
||||
/// it will convert them all to Array(String)
|
||||
void transformJSONTuplesAndArraysToArrays(
|
||||
DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
|
||||
DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
|
||||
{
|
||||
if (!type_indexes.contains(TypeIndex::Tuple))
|
||||
return;
|
||||
@ -266,12 +318,14 @@ namespace
|
||||
if (isArray(type) || isTuple(type))
|
||||
type = std::make_shared<DataTypeArray>(nested_types.back());
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Tuple);
|
||||
}
|
||||
}
|
||||
|
||||
/// If we have Map and Object(JSON) types, convert all Map types to Object(JSON).
|
||||
/// If we have Map types with different value types, convert all Map types to Object(JSON)
|
||||
void transformMapsAndObjectsToObjects(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
void transformMapsAndObjectsToObjects(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
if (!type_indexes.contains(TypeIndex::Map))
|
||||
return;
|
||||
@ -298,9 +352,11 @@ namespace
|
||||
if (isMap(type))
|
||||
type = std::make_shared<DataTypeObject>("json", true);
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Map);
|
||||
}
|
||||
|
||||
void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
bool have_maps = type_indexes.contains(TypeIndex::Map);
|
||||
bool have_objects = type_indexes.contains(TypeIndex::Object);
|
||||
@ -315,19 +371,26 @@ namespace
|
||||
if (isMap(type) || isObject(type))
|
||||
type = std::make_shared<DataTypeString>();
|
||||
}
|
||||
|
||||
type_indexes.erase(TypeIndex::Map);
|
||||
type_indexes.erase(TypeIndex::Object);
|
||||
}
|
||||
|
||||
template <bool is_json>
|
||||
void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
|
||||
{
|
||||
auto transform_simple_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
auto transform_simple_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
/// Remove all Nothing type if possible.
|
||||
transformNothingSimpleTypes(data_types, type_indexes);
|
||||
|
||||
/// Transform integers to floats if needed.
|
||||
if (settings.try_infer_integers)
|
||||
{
|
||||
/// Transform Int64 to UInt64 if needed.
|
||||
transformIntegers(data_types, type_indexes);
|
||||
/// Transform integers to floats if needed.
|
||||
transformIntegersAndFloatsToFloats(data_types, type_indexes);
|
||||
}
|
||||
|
||||
/// Transform Date to DateTime or both to String if needed.
|
||||
if (settings.try_infer_dates || settings.try_infer_datetimes)
|
||||
@ -347,14 +410,14 @@ namespace
|
||||
transformBoolsAndNumbersToNumbers(data_types, type_indexes);
|
||||
};
|
||||
|
||||
auto transform_complex_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
|
||||
auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
|
||||
{
|
||||
/// Make types Nullable if needed.
|
||||
transformNullableTypes(data_types, type_indexes);
|
||||
|
||||
/// If we have type Nothing, it means that we had empty Array/Map while inference.
|
||||
/// If there is at least one non Nothing type, change all Nothing types to it.
|
||||
transformNothingComplexTypes(data_types);
|
||||
transformNothingComplexTypes(data_types, type_indexes);
|
||||
|
||||
if constexpr (!is_json)
|
||||
return;
|
||||
@ -569,12 +632,30 @@ namespace
|
||||
return read_int ? std::make_shared<DataTypeInt64>() : nullptr;
|
||||
|
||||
char * int_end = buf.position();
|
||||
/// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof.
|
||||
/// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
|
||||
buf.position() = number_start;
|
||||
|
||||
bool read_uint = false;
|
||||
char * uint_end = nullptr;
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
if (!read_int)
|
||||
{
|
||||
UInt64 tmp_uint;
|
||||
read_uint = tryReadIntText(tmp_uint, buf);
|
||||
/// If we reached eof, it cannot be float (it requires no less data than integer)
|
||||
if (buf.eof())
|
||||
return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
|
||||
|
||||
uint_end = buf.position();
|
||||
buf.position() = number_start;
|
||||
}
|
||||
|
||||
if (tryReadFloatText(tmp_float, buf))
|
||||
{
|
||||
if (read_int && buf.position() == int_end)
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
if (read_uint && buf.position() == uint_end)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
|
||||
@ -590,6 +671,19 @@ namespace
|
||||
bool read_int = tryReadIntText(tmp_int, peekable_buf);
|
||||
auto * int_end = peekable_buf.position();
|
||||
peekable_buf.rollbackToCheckpoint(true);
|
||||
|
||||
bool read_uint = false;
|
||||
char * uint_end = nullptr;
|
||||
/// In case of Int64 overflow we can try to infer UInt64.
|
||||
if (!read_int)
|
||||
{
|
||||
PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
|
||||
UInt64 tmp_uint;
|
||||
read_uint = tryReadIntText(tmp_uint, peekable_buf);
|
||||
uint_end = peekable_buf.position();
|
||||
peekable_buf.rollbackToCheckpoint(true);
|
||||
}
|
||||
|
||||
if (tryReadFloatText(tmp_float, peekable_buf))
|
||||
{
|
||||
/// Float parsing reads no fewer bytes than integer parsing,
|
||||
@ -597,6 +691,8 @@ namespace
|
||||
/// If it's the same, then it's integer.
|
||||
if (read_int && peekable_buf.position() == int_end)
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
if (read_uint && peekable_buf.position() == uint_end)
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
return std::make_shared<DataTypeFloat64>();
|
||||
}
|
||||
}
|
||||
@ -874,6 +970,11 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
|
||||
Int64 tmp_int;
|
||||
if (tryReadIntText(tmp_int, buf) && buf.eof())
|
||||
return std::make_shared<DataTypeInt64>();
|
||||
|
||||
/// In case of Int64 overflow, try to infer UInt64
|
||||
UInt64 tmp_uint;
|
||||
if (tryReadIntText(tmp_uint, buf) && buf.eof())
|
||||
return std::make_shared<DataTypeUInt64>();
|
||||
}
|
||||
|
||||
/// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof.
|
||||
|
@ -786,21 +786,21 @@ struct ToDayOfWeekImpl
|
||||
{
|
||||
static constexpr auto name = "toDayOfWeek";
|
||||
|
||||
static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone)
|
||||
static inline UInt8 execute(Int64 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDayOfWeek(t);
|
||||
return time_zone.toDayOfWeek(t, week_day_mode);
|
||||
}
|
||||
static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone)
|
||||
static inline UInt8 execute(UInt32 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDayOfWeek(t);
|
||||
return time_zone.toDayOfWeek(t, week_day_mode);
|
||||
}
|
||||
static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
|
||||
static inline UInt8 execute(Int32 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDayOfWeek(ExtendedDayNum(d));
|
||||
return time_zone.toDayOfWeek(ExtendedDayNum(d), week_day_mode);
|
||||
}
|
||||
static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
|
||||
static inline UInt8 execute(UInt16 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
|
||||
{
|
||||
return time_zone.toDayOfWeek(DayNum(d));
|
||||
return time_zone.toDayOfWeek(DayNum(d), week_day_mode);
|
||||
}
|
||||
|
||||
using FactorTransform = ToMondayImpl;
|
||||
|
@ -225,8 +225,8 @@ public:
|
||||
}
|
||||
else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
|
||||
{
|
||||
auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
|
||||
auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
|
||||
auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x);
|
||||
auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y);
|
||||
if ((x_day_of_week > y_day_of_week)
|
||||
|| ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
|
||||
|| ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
|
||||
|
@ -276,7 +276,7 @@ private:
|
||||
{
|
||||
static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
|
||||
{
|
||||
const auto day = ToDayOfWeekImpl::execute(source, timezone);
|
||||
const auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
|
||||
static constexpr std::string_view day_names[] =
|
||||
{
|
||||
"Monday",
|
||||
|
@ -344,13 +344,13 @@ private:
|
||||
|
||||
static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
*dest = '0' + ToDayOfWeekImpl::execute(source, timezone);
|
||||
*dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto day = ToDayOfWeekImpl::execute(source, timezone);
|
||||
auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
|
||||
*dest = '0' + (day == 7 ? 0 : day);
|
||||
return 1;
|
||||
}
|
||||
@ -499,13 +499,13 @@ private:
|
||||
|
||||
static size_t jodaDayOfWeek1Based(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto week_day = ToDayOfWeekImpl::execute(source, timezone);
|
||||
auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
|
||||
return writeNumberWithPadding(dest, week_day, min_represent_digits);
|
||||
}
|
||||
|
||||
static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
|
||||
{
|
||||
auto week_day = ToDayOfWeekImpl::execute(source, timezone);
|
||||
auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
|
||||
if (week_day == 7)
|
||||
week_day = 0;
|
||||
|
||||
|
@ -1,13 +1,12 @@
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/DateTimeTransforms.h>
|
||||
#include <Functions/FunctionDateOrDateTimeToSomething.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <Functions/FunctionCustomWeekToSomething.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
using FunctionToDayOfWeek = FunctionDateOrDateTimeToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
|
||||
using FunctionToDayOfWeek = FunctionCustomWeekToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
|
||||
|
||||
REGISTER_FUNCTION(ToDayOfWeek)
|
||||
{
|
||||
|
@ -377,9 +377,6 @@ private:
|
||||
|
||||
inline static ContextPtr global_context_instance;
|
||||
|
||||
/// A flag, used to mark if reader needs to apply deleted rows mask.
|
||||
bool apply_deleted_mask = true;
|
||||
|
||||
/// Temporary data for query execution accounting.
|
||||
TemporaryDataOnDiskScopePtr temp_data_on_disk;
|
||||
public:
|
||||
@ -973,9 +970,6 @@ public:
|
||||
bool isInternalQuery() const { return is_internal_query; }
|
||||
void setInternalQuery(bool internal) { is_internal_query = internal; }
|
||||
|
||||
bool applyDeletedMask() const { return apply_deleted_mask; }
|
||||
void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; }
|
||||
|
||||
ActionLocksManagerPtr getActionLocksManager() const;
|
||||
|
||||
enum class ApplicationType
|
||||
|
@ -790,10 +790,10 @@ void ExpressionActions::assertDeterministic() const
|
||||
}
|
||||
|
||||
|
||||
std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
|
||||
NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
|
||||
{
|
||||
std::optional<size_t> min_size;
|
||||
String res;
|
||||
NameAndTypePair result;
|
||||
|
||||
for (const auto & column : columns)
|
||||
{
|
||||
@ -807,14 +807,14 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum
|
||||
if (!min_size || size < *min_size)
|
||||
{
|
||||
min_size = size;
|
||||
res = column.name;
|
||||
result = column;
|
||||
}
|
||||
}
|
||||
|
||||
if (!min_size)
|
||||
throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return res;
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string ExpressionActions::dumpActions() const
|
||||
|
@ -111,7 +111,7 @@ public:
|
||||
std::string dumpActions() const;
|
||||
JSONBuilder::ItemPtr toTree() const;
|
||||
|
||||
static std::string getSmallestColumn(const NamesAndTypesList & columns);
|
||||
static NameAndTypePair getSmallestColumn(const NamesAndTypesList & columns);
|
||||
|
||||
/// Check if column is always zero. True if it's definite, false if we can't say for sure.
|
||||
/// Call it only after subqueries for sets were executed.
|
||||
|
@ -30,6 +30,9 @@
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Processors/Sources/ThrowingExceptionSource.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -190,7 +193,8 @@ ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_
|
||||
|
||||
|
||||
bool isStorageTouchedByMutations(
|
||||
const StoragePtr & storage,
|
||||
MergeTreeData & storage,
|
||||
MergeTreeData::DataPartPtr source_part,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const std::vector<MutationCommand> & commands,
|
||||
ContextMutablePtr context_copy)
|
||||
@ -199,19 +203,15 @@ bool isStorageTouchedByMutations(
|
||||
return false;
|
||||
|
||||
bool all_commands_can_be_skipped = true;
|
||||
auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast<StorageFromMergeTreeDataPart>(storage);
|
||||
for (const MutationCommand & command : commands)
|
||||
{
|
||||
if (!command.predicate) /// The command touches all rows.
|
||||
return true;
|
||||
|
||||
if (command.partition && !storage_from_merge_tree_data_part)
|
||||
throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED);
|
||||
|
||||
if (command.partition && storage_from_merge_tree_data_part)
|
||||
if (command.partition)
|
||||
{
|
||||
const String partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context_copy);
|
||||
if (partition_id == storage_from_merge_tree_data_part->getPartitionId())
|
||||
const String partition_id = storage.getPartitionIDFromQuery(command.partition, context_copy);
|
||||
if (partition_id == source_part->info.partition_id)
|
||||
all_commands_can_be_skipped = false;
|
||||
}
|
||||
else
|
||||
@ -229,13 +229,15 @@ bool isStorageTouchedByMutations(
|
||||
context_copy->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false);
|
||||
context_copy->setSetting("max_streams_for_merge_tree_reading", Field(0));
|
||||
|
||||
ASTPtr select_query = prepareQueryAffectedAST(commands, storage, context_copy);
|
||||
ASTPtr select_query = prepareQueryAffectedAST(commands, storage.shared_from_this(), context_copy);
|
||||
|
||||
auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part);
|
||||
|
||||
/// Interpreter must be alive, when we use result of execute() method.
|
||||
/// For some reason it may copy context and give it into ExpressionTransform
|
||||
/// after that we will use context from destroyed stack frame in our stream.
|
||||
InterpreterSelectQuery interpreter(
|
||||
select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections());
|
||||
select_query, context_copy, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections());
|
||||
auto io = interpreter.execute();
|
||||
PullingPipelineExecutor executor(io.pipeline);
|
||||
|
||||
@ -288,6 +290,57 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
|
||||
return command.predicate ? command.predicate->clone() : partition_predicate_as_ast_func;
|
||||
}
|
||||
|
||||
MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(storage_))
|
||||
{
|
||||
}
|
||||
|
||||
MutationsInterpreter::Source::Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_)
|
||||
: data(&storage_), part(std::move(source_part_))
|
||||
{
|
||||
}
|
||||
|
||||
StorageSnapshotPtr MutationsInterpreter::Source::getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const
|
||||
{
|
||||
if (data)
|
||||
return data->getStorageSnapshot(snapshot_, context_);
|
||||
|
||||
return storage->getStorageSnapshot(snapshot_, context_);
|
||||
}
|
||||
|
||||
StoragePtr MutationsInterpreter::Source::getStorage() const
|
||||
{
|
||||
if (data)
|
||||
return data->shared_from_this();
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
const MergeTreeData * MutationsInterpreter::Source::getMergeTreeData() const
|
||||
{
|
||||
if (data)
|
||||
return data;
|
||||
|
||||
return dynamic_cast<const MergeTreeData *>(storage.get());
|
||||
}
|
||||
|
||||
bool MutationsInterpreter::Source::supportsLightweightDelete() const
|
||||
{
|
||||
if (part)
|
||||
return part->supportLightweightDeleteMutate();
|
||||
|
||||
return storage->supportsLightweightDelete();
|
||||
}
|
||||
|
||||
|
||||
bool MutationsInterpreter::Source::hasLightweightDeleteMask() const
|
||||
{
|
||||
return part && part->hasLightweightDelete();
|
||||
}
|
||||
|
||||
bool MutationsInterpreter::Source::materializeTTLRecalculateOnly() const
|
||||
{
|
||||
return data && data->getSettings()->materialize_ttl_recalculate_only;
|
||||
}
|
||||
|
||||
MutationsInterpreter::MutationsInterpreter(
|
||||
StoragePtr storage_,
|
||||
@ -297,7 +350,45 @@ MutationsInterpreter::MutationsInterpreter(
|
||||
bool can_execute_,
|
||||
bool return_all_columns_,
|
||||
bool return_deleted_rows_)
|
||||
: storage(std::move(storage_))
|
||||
: MutationsInterpreter(
|
||||
Source(std::move(storage_)),
|
||||
metadata_snapshot_, std::move(commands_), std::move(context_),
|
||||
can_execute_, return_all_columns_, return_deleted_rows_)
|
||||
{
|
||||
if (can_execute_ && dynamic_cast<const MergeTreeData *>(source.getStorage().get()))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::LOGICAL_ERROR,
|
||||
"Cannot execute mutation for {}. Mutation should be applied to every part separately.",
|
||||
source.getStorage()->getName());
|
||||
}
|
||||
}
|
||||
|
||||
MutationsInterpreter::MutationsInterpreter(
|
||||
MergeTreeData & storage_,
|
||||
MergeTreeData::DataPartPtr source_part_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
MutationCommands commands_,
|
||||
ContextPtr context_,
|
||||
bool can_execute_,
|
||||
bool return_all_columns_,
|
||||
bool return_deleted_rows_)
|
||||
: MutationsInterpreter(
|
||||
Source(storage_, std::move(source_part_)),
|
||||
metadata_snapshot_, std::move(commands_), std::move(context_),
|
||||
can_execute_, return_all_columns_, return_deleted_rows_)
|
||||
{
|
||||
}
|
||||
|
||||
MutationsInterpreter::MutationsInterpreter(
|
||||
Source source_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
MutationCommands commands_,
|
||||
ContextPtr context_,
|
||||
bool can_execute_,
|
||||
bool return_all_columns_,
|
||||
bool return_deleted_rows_)
|
||||
: source(std::move(source_))
|
||||
, metadata_snapshot(metadata_snapshot_)
|
||||
, commands(std::move(commands_))
|
||||
, context(Context::createCopy(context_))
|
||||
@ -306,12 +397,12 @@ MutationsInterpreter::MutationsInterpreter(
|
||||
, return_all_columns(return_all_columns_)
|
||||
, return_deleted_rows(return_deleted_rows_)
|
||||
{
|
||||
mutation_ast = prepare(!can_execute);
|
||||
prepare(!can_execute);
|
||||
}
|
||||
|
||||
static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot)
|
||||
static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const StorageMetadataPtr & metadata_snapshot)
|
||||
{
|
||||
const MergeTreeData * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get());
|
||||
const MergeTreeData * merge_tree_data = source.getMergeTreeData();
|
||||
if (!merge_tree_data)
|
||||
return {};
|
||||
|
||||
@ -333,21 +424,12 @@ static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPt
|
||||
return key_columns;
|
||||
}
|
||||
|
||||
static bool materializeTTLRecalculateOnly(const StoragePtr & storage)
|
||||
{
|
||||
auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast<StorageFromMergeTreeDataPart>(storage);
|
||||
if (!storage_from_merge_tree_data_part)
|
||||
return false;
|
||||
|
||||
return storage_from_merge_tree_data_part->materializeTTLRecalculateOnly();
|
||||
}
|
||||
|
||||
static void validateUpdateColumns(
|
||||
const StoragePtr & storage,
|
||||
const MutationsInterpreter::Source & source,
|
||||
const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns,
|
||||
const std::unordered_map<String, Names> & column_to_affected_materialized)
|
||||
{
|
||||
NameSet key_columns = getKeyColumns(storage, metadata_snapshot);
|
||||
NameSet key_columns = getKeyColumns(source, metadata_snapshot);
|
||||
|
||||
for (const String & column_name : updated_columns)
|
||||
{
|
||||
@ -364,7 +446,7 @@ static void validateUpdateColumns(
|
||||
/// Allow to override value of lightweight delete filter virtual column
|
||||
if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
|
||||
{
|
||||
if (!storage->supportsLightweightDelete())
|
||||
if (!source.supportsLightweightDelete())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
|
||||
found = true;
|
||||
}
|
||||
@ -427,7 +509,7 @@ static std::optional<std::vector<ASTPtr>> getExpressionsOfUpdatedNestedSubcolumn
|
||||
return res;
|
||||
}
|
||||
|
||||
ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
void MutationsInterpreter::prepare(bool dry_run)
|
||||
{
|
||||
if (is_prepared)
|
||||
throw Exception("MutationsInterpreter is already prepared. It is a bug.", ErrorCodes::LOGICAL_ERROR);
|
||||
@ -441,14 +523,11 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
NamesAndTypesList all_columns = columns_desc.getAllPhysical();
|
||||
|
||||
/// Add _row_exists column if it is physically present in the part
|
||||
if (auto part_storage = dynamic_pointer_cast<DB::StorageFromMergeTreeDataPart>(storage))
|
||||
{
|
||||
if (part_storage->hasLightweightDeletedMask())
|
||||
all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
|
||||
}
|
||||
if (source.hasLightweightDeleteMask())
|
||||
all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
|
||||
|
||||
NameSet updated_columns;
|
||||
bool materialize_ttl_recalculate_only = materializeTTLRecalculateOnly(storage);
|
||||
bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly();
|
||||
|
||||
for (const MutationCommand & command : commands)
|
||||
{
|
||||
@ -481,7 +560,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
}
|
||||
}
|
||||
|
||||
validateUpdateColumns(storage, metadata_snapshot, updated_columns, column_to_affected_materialized);
|
||||
validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized);
|
||||
}
|
||||
|
||||
dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns);
|
||||
@ -778,15 +857,10 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
stages_copy.back().filters = stage.filters;
|
||||
}
|
||||
|
||||
const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true);
|
||||
InterpreterSelectQuery interpreter{
|
||||
select_query, context, storage, metadata_snapshot,
|
||||
SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits().ignoreProjections()};
|
||||
prepareMutationStages(stages_copy, true);
|
||||
|
||||
auto first_stage_header = interpreter.getSampleBlock();
|
||||
QueryPlan plan;
|
||||
auto source = std::make_shared<NullSource>(first_stage_header);
|
||||
plan.addStep(std::make_unique<ReadFromPreparedSource>(Pipe(std::move(source))));
|
||||
initQueryPlan(stages_copy.front(), plan);
|
||||
auto pipeline = addStreamsForLaterStages(stages_copy, plan);
|
||||
updated_header = std::make_unique<Block>(pipeline.getHeader());
|
||||
}
|
||||
@ -801,21 +875,18 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
|
||||
|
||||
is_prepared = true;
|
||||
|
||||
return prepareInterpreterSelectQuery(stages, dry_run);
|
||||
prepareMutationStages(stages, dry_run);
|
||||
}
|
||||
|
||||
ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> & prepared_stages, bool dry_run)
|
||||
void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_stages, bool dry_run)
|
||||
{
|
||||
auto storage_snapshot = storage->getStorageSnapshot(metadata_snapshot, context);
|
||||
auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context);
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects();
|
||||
auto all_columns = storage_snapshot->getColumns(options);
|
||||
|
||||
/// Add _row_exists column if it is present in the part
|
||||
if (auto part_storage = dynamic_pointer_cast<DB::StorageFromMergeTreeDataPart>(storage))
|
||||
{
|
||||
if (part_storage->hasLightweightDeletedMask())
|
||||
all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
|
||||
}
|
||||
if (source.hasLightweightDeleteMask())
|
||||
all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
|
||||
|
||||
/// Next, for each stage calculate columns changed by this and previous stages.
|
||||
for (size_t i = 0; i < prepared_stages.size(); ++i)
|
||||
@ -839,7 +910,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
|
||||
|
||||
/// Now, calculate `expressions_chain` for each stage except the first.
|
||||
/// Do it backwards to propagate information about columns required as input for a stage to the previous stage.
|
||||
for (size_t i = prepared_stages.size() - 1; i > 0; --i)
|
||||
for (int64_t i = prepared_stages.size() - 1; i >= 0; --i)
|
||||
{
|
||||
auto & stage = prepared_stages[i];
|
||||
|
||||
@ -859,7 +930,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
|
||||
/// e.g. ALTER referencing the same table in scalar subquery
|
||||
bool execute_scalar_subqueries = !dry_run;
|
||||
auto syntax_result = TreeRewriter(context).analyze(
|
||||
all_asts, all_columns, storage, storage_snapshot,
|
||||
all_asts, all_columns, source.getStorage(), storage_snapshot,
|
||||
false, true, execute_scalar_subqueries);
|
||||
|
||||
if (execute_scalar_subqueries && context->hasQueryContext())
|
||||
@ -897,6 +968,9 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
|
||||
}
|
||||
}
|
||||
|
||||
if (i == 0 && actions_chain.steps.empty())
|
||||
actions_chain.lastStep(syntax_result->required_source_columns);
|
||||
|
||||
/// Remove all intermediate columns.
|
||||
actions_chain.addStep();
|
||||
actions_chain.getLastStep().required_output.clear();
|
||||
@ -908,49 +982,198 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
|
||||
|
||||
actions_chain.finalize();
|
||||
|
||||
/// Propagate information about columns needed as input.
|
||||
for (const auto & column : actions_chain.steps.front()->getRequiredColumns())
|
||||
prepared_stages[i - 1].output_columns.insert(column.name);
|
||||
}
|
||||
|
||||
/// Execute first stage as a SELECT statement.
|
||||
|
||||
auto select = std::make_shared<ASTSelectQuery>();
|
||||
|
||||
select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
|
||||
for (const auto & column_name : prepared_stages[0].output_columns)
|
||||
select->select()->children.push_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
|
||||
/// Don't let select list be empty.
|
||||
if (select->select()->children.empty())
|
||||
select->select()->children.push_back(std::make_shared<ASTLiteral>(Field(0)));
|
||||
|
||||
if (!prepared_stages[0].filters.empty())
|
||||
{
|
||||
ASTPtr where_expression;
|
||||
if (prepared_stages[0].filters.size() == 1)
|
||||
where_expression = prepared_stages[0].filters[0];
|
||||
else
|
||||
if (i)
|
||||
{
|
||||
auto coalesced_predicates = std::make_shared<ASTFunction>();
|
||||
coalesced_predicates->name = "and";
|
||||
coalesced_predicates->arguments = std::make_shared<ASTExpressionList>();
|
||||
coalesced_predicates->children.push_back(coalesced_predicates->arguments);
|
||||
coalesced_predicates->arguments->children = prepared_stages[0].filters;
|
||||
where_expression = std::move(coalesced_predicates);
|
||||
/// Propagate information about columns needed as input.
|
||||
for (const auto & column : actions_chain.steps.front()->getRequiredColumns())
|
||||
prepared_stages[i - 1].output_columns.insert(column.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This structure re-implements adding virtual columns while reading from MergeTree part.
|
||||
/// It would be good to unify it with IMergeTreeSelectAlgorithm.
|
||||
struct VirtualColumns
|
||||
{
|
||||
struct ColumnAndPosition
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
size_t position;
|
||||
};
|
||||
|
||||
using Columns = std::vector<ColumnAndPosition>;
|
||||
|
||||
Columns virtuals;
|
||||
Names columns_to_read;
|
||||
|
||||
VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns))
|
||||
{
|
||||
for (size_t i = 0; i < columns_to_read.size(); ++i)
|
||||
{
|
||||
if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name)
|
||||
{
|
||||
LoadedMergeTreeDataPartInfoForReader part_info_reader(part);
|
||||
if (!part_info_reader.getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name))
|
||||
{
|
||||
ColumnWithTypeAndName mask_column;
|
||||
mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type;
|
||||
mask_column.column = mask_column.type->createColumnConst(0, 1);
|
||||
mask_column.name = std::move(columns_to_read[i]);
|
||||
|
||||
virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i});
|
||||
}
|
||||
}
|
||||
else if (columns_to_read[i] == "_partition_id")
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.type = std::make_shared<DataTypeString>();
|
||||
column.column = column.type->createColumnConst(0, part->info.partition_id);
|
||||
column.name = std::move(columns_to_read[i]);
|
||||
|
||||
virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i});
|
||||
}
|
||||
}
|
||||
|
||||
if (!virtuals.empty())
|
||||
{
|
||||
Names columns_no_virtuals;
|
||||
columns_no_virtuals.reserve(columns_to_read.size());
|
||||
size_t next_virtual = 0;
|
||||
for (size_t i = 0; i < columns_to_read.size(); ++i)
|
||||
{
|
||||
if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position)
|
||||
++next_virtual;
|
||||
else
|
||||
columns_no_virtuals.emplace_back(std::move(columns_to_read[i]));
|
||||
}
|
||||
|
||||
columns_to_read.swap(columns_no_virtuals);
|
||||
}
|
||||
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
|
||||
}
|
||||
|
||||
return select;
|
||||
void addVirtuals(QueryPlan & plan)
|
||||
{
|
||||
auto dag = std::make_unique<ActionsDAG>(plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
|
||||
for (auto & column : virtuals)
|
||||
{
|
||||
const auto & adding_const = dag->addColumn(std::move(column.column));
|
||||
auto & outputs = dag->getOutputs();
|
||||
outputs.insert(outputs.begin() + column.position, &adding_const);
|
||||
}
|
||||
|
||||
auto step = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(dag));
|
||||
plan.addStep(std::move(step));
|
||||
}
|
||||
};
|
||||
|
||||
void MutationsInterpreter::Source::read(
|
||||
Stage & first_stage,
|
||||
QueryPlan & plan,
|
||||
const StorageMetadataPtr & snapshot_,
|
||||
const ContextPtr & context_,
|
||||
bool apply_deleted_mask_,
|
||||
bool can_execute_) const
|
||||
{
|
||||
auto required_columns = first_stage.expressions_chain.steps.front()->getRequiredColumns().getNames();
|
||||
auto storage_snapshot = getStorageSnapshot(snapshot_, context_);
|
||||
|
||||
if (!can_execute_)
|
||||
{
|
||||
auto header = storage_snapshot->getSampleBlockForColumns(required_columns);
|
||||
auto callback = []()
|
||||
{
|
||||
return DB::Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute a mutation because can_execute flag set to false");
|
||||
};
|
||||
|
||||
Pipe pipe(std::make_shared<ThrowingExceptionSource>(header, callback));
|
||||
|
||||
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
|
||||
plan.addStep(std::move(read_from_pipe));
|
||||
return;
|
||||
}
|
||||
|
||||
if (data)
|
||||
{
|
||||
const auto & steps = first_stage.expressions_chain.steps;
|
||||
const auto & names = first_stage.filter_column_names;
|
||||
size_t num_filters = names.size();
|
||||
|
||||
ActionsDAGPtr filter;
|
||||
if (!first_stage.filter_column_names.empty())
|
||||
{
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs nodes(num_filters);
|
||||
for (size_t i = 0; i < num_filters; ++i)
|
||||
nodes[i] = &steps[i]->actions()->findInOutputs(names[i]);
|
||||
|
||||
filter = ActionsDAG::buildFilterActionsDAG(nodes, {}, context_);
|
||||
}
|
||||
|
||||
VirtualColumns virtual_columns(std::move(required_columns), part);
|
||||
|
||||
createMergeTreeSequentialSource(
|
||||
plan, *data, storage_snapshot, part, std::move(virtual_columns.columns_to_read), apply_deleted_mask_, filter, context_,
|
||||
&Poco::Logger::get("MutationsInterpreter"));
|
||||
|
||||
virtual_columns.addVirtuals(plan);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto select = std::make_shared<ASTSelectQuery>();
|
||||
|
||||
select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
|
||||
for (const auto & column_name : first_stage.output_columns)
|
||||
select->select()->children.push_back(std::make_shared<ASTIdentifier>(column_name));
|
||||
|
||||
/// Don't let select list be empty.
|
||||
if (select->select()->children.empty())
|
||||
select->select()->children.push_back(std::make_shared<ASTLiteral>(Field(0)));
|
||||
|
||||
if (!first_stage.filters.empty())
|
||||
{
|
||||
ASTPtr where_expression;
|
||||
if (first_stage.filters.size() == 1)
|
||||
where_expression = first_stage.filters[0];
|
||||
else
|
||||
{
|
||||
auto coalesced_predicates = std::make_shared<ASTFunction>();
|
||||
coalesced_predicates->name = "and";
|
||||
coalesced_predicates->arguments = std::make_shared<ASTExpressionList>();
|
||||
coalesced_predicates->children.push_back(coalesced_predicates->arguments);
|
||||
coalesced_predicates->arguments->children = first_stage.filters;
|
||||
where_expression = std::move(coalesced_predicates);
|
||||
}
|
||||
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
|
||||
}
|
||||
|
||||
SelectQueryInfo query_info;
|
||||
query_info.query = std::move(select);
|
||||
|
||||
size_t max_block_size = context_->getSettingsRef().max_block_size;
|
||||
size_t max_streams = 1;
|
||||
storage->read(plan, required_columns, storage_snapshot, query_info, context_, QueryProcessingStage::FetchColumns, max_block_size, max_streams);
|
||||
|
||||
if (!plan.isInitialized())
|
||||
{
|
||||
/// It may be possible when there is nothing to read from storage.
|
||||
auto header = storage_snapshot->getSampleBlockForColumns(required_columns);
|
||||
auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(Pipe(std::make_shared<NullSource>(header)));
|
||||
plan.addStep(std::move(read_from_pipe));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MutationsInterpreter::initQueryPlan(Stage & first_stage, QueryPlan & plan)
|
||||
{
|
||||
source.read(first_stage, plan, metadata_snapshot, context, apply_deleted_mask, can_execute);
|
||||
addCreatingSetsStep(plan, first_stage.analyzer->getPreparedSets(), context);
|
||||
}
|
||||
|
||||
QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::vector<Stage> & prepared_stages, QueryPlan & plan) const
|
||||
{
|
||||
for (size_t i_stage = 1; i_stage < prepared_stages.size(); ++i_stage)
|
||||
for (const Stage & stage : prepared_stages)
|
||||
{
|
||||
const Stage & stage = prepared_stages[i_stage];
|
||||
|
||||
for (size_t i = 0; i < stage.expressions_chain.steps.size(); ++i)
|
||||
{
|
||||
const auto & step = stage.expressions_chain.steps[i];
|
||||
@ -988,14 +1211,11 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v
|
||||
|
||||
void MutationsInterpreter::validate()
|
||||
{
|
||||
if (!select_interpreter)
|
||||
select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context, storage, metadata_snapshot, select_limits);
|
||||
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
/// For Replicated* storages mutations cannot employ non-deterministic functions
|
||||
/// because that produces inconsistencies between replicas
|
||||
if (startsWith(storage->getName(), "Replicated") && !settings.allow_nondeterministic_mutations)
|
||||
if (startsWith(source.getStorage()->getName(), "Replicated") && !settings.allow_nondeterministic_mutations)
|
||||
{
|
||||
for (const auto & command : commands)
|
||||
{
|
||||
@ -1012,7 +1232,7 @@ void MutationsInterpreter::validate()
|
||||
}
|
||||
|
||||
QueryPlan plan;
|
||||
select_interpreter->buildQueryPlan(plan);
|
||||
initQueryPlan(stages.front(), plan);
|
||||
auto pipeline = addStreamsForLaterStages(stages, plan);
|
||||
}
|
||||
|
||||
@ -1021,23 +1241,8 @@ QueryPipelineBuilder MutationsInterpreter::execute()
|
||||
if (!can_execute)
|
||||
throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
if (!select_interpreter)
|
||||
{
|
||||
/// Skip to apply deleted mask for MutateSomePartColumn cases when part has lightweight delete.
|
||||
if (!apply_deleted_mask)
|
||||
{
|
||||
auto context_for_reading = Context::createCopy(context);
|
||||
context_for_reading->setApplyDeletedMask(apply_deleted_mask);
|
||||
select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context_for_reading, storage, metadata_snapshot, select_limits);
|
||||
}
|
||||
else
|
||||
select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context, storage, metadata_snapshot, select_limits);
|
||||
}
|
||||
|
||||
|
||||
QueryPlan plan;
|
||||
select_interpreter->buildQueryPlan(plan);
|
||||
|
||||
initQueryPlan(stages.front(), plan);
|
||||
auto builder = addStreamsForLaterStages(stages, plan);
|
||||
|
||||
/// Sometimes we update just part of columns (for example UPDATE mutation)
|
||||
@ -1069,11 +1274,7 @@ const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const
|
||||
|
||||
size_t MutationsInterpreter::evaluateCommandsSize()
|
||||
{
|
||||
for (const MutationCommand & command : commands)
|
||||
if (unlikely(!command.predicate && !command.partition)) /// The command touches all rows.
|
||||
return mutation_ast->size();
|
||||
|
||||
return std::max(prepareQueryAffectedAST(commands, storage, context)->size(), mutation_ast->size());
|
||||
return prepareQueryAffectedAST(commands, source.getStorage(), context)->size();
|
||||
}
|
||||
|
||||
std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const
|
||||
@ -1096,7 +1297,7 @@ std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIf
|
||||
|
||||
ASTPtr MutationsInterpreter::getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const
|
||||
{
|
||||
return DB::getPartitionAndPredicateExpressionForMutationCommand(command, storage, context);
|
||||
return DB::getPartitionAndPredicateExpressionForMutationCommand(command, source.getStorage(), context);
|
||||
}
|
||||
|
||||
bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const
|
||||
|
@ -19,7 +19,8 @@ using QueryPipelineBuilderPtr = std::unique_ptr<QueryPipelineBuilder>;
|
||||
|
||||
/// Return false if the data isn't going to be changed by mutations.
|
||||
bool isStorageTouchedByMutations(
|
||||
const StoragePtr & storage,
|
||||
MergeTreeData & storage,
|
||||
MergeTreeData::DataPartPtr source_part,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const std::vector<MutationCommand> & commands,
|
||||
ContextMutablePtr context_copy
|
||||
@ -35,6 +36,8 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
|
||||
/// to this data.
|
||||
class MutationsInterpreter
|
||||
{
|
||||
struct Stage;
|
||||
|
||||
public:
|
||||
/// Storage to mutate, array of mutations commands and context. If you really want to execute mutation
|
||||
/// use can_execute = true, in other cases (validation, amount of commands) it can be false
|
||||
@ -47,8 +50,18 @@ public:
|
||||
bool return_all_columns_ = false,
|
||||
bool return_deleted_rows_ = false);
|
||||
|
||||
void validate();
|
||||
/// Special case for MergeTree
|
||||
MutationsInterpreter(
|
||||
MergeTreeData & storage_,
|
||||
MergeTreeData::DataPartPtr source_part_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
MutationCommands commands_,
|
||||
ContextPtr context_,
|
||||
bool can_execute_,
|
||||
bool return_all_columns_ = false,
|
||||
bool return_deleted_rows_ = false);
|
||||
|
||||
void validate();
|
||||
size_t evaluateCommandsSize();
|
||||
|
||||
/// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices.
|
||||
@ -82,19 +95,60 @@ public:
|
||||
|
||||
void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; }
|
||||
|
||||
/// Internal class which represents a data part for MergeTree
|
||||
/// or just storage for other storages.
|
||||
/// The main idea is to create a dedicated reading from MergeTree part.
|
||||
/// Additionally we propagate some storage properties.
|
||||
struct Source
|
||||
{
|
||||
StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const;
|
||||
StoragePtr getStorage() const;
|
||||
const MergeTreeData * getMergeTreeData() const;
|
||||
|
||||
bool supportsLightweightDelete() const;
|
||||
bool hasLightweightDeleteMask() const;
|
||||
bool materializeTTLRecalculateOnly() const;
|
||||
|
||||
void read(
|
||||
Stage & first_stage,
|
||||
QueryPlan & plan,
|
||||
const StorageMetadataPtr & snapshot_,
|
||||
const ContextPtr & context_,
|
||||
bool apply_deleted_mask_,
|
||||
bool can_execute_) const;
|
||||
|
||||
explicit Source(StoragePtr storage_);
|
||||
Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_);
|
||||
|
||||
private:
|
||||
StoragePtr storage;
|
||||
|
||||
/// Special case for MergeTree.
|
||||
MergeTreeData * data = nullptr;
|
||||
MergeTreeData::DataPartPtr part;
|
||||
};
|
||||
|
||||
private:
|
||||
ASTPtr prepare(bool dry_run);
|
||||
MutationsInterpreter(
|
||||
Source source_,
|
||||
const StorageMetadataPtr & metadata_snapshot_,
|
||||
MutationCommands commands_,
|
||||
ContextPtr context_,
|
||||
bool can_execute_,
|
||||
bool return_all_columns_,
|
||||
bool return_deleted_rows_);
|
||||
|
||||
struct Stage;
|
||||
void prepare(bool dry_run);
|
||||
|
||||
ASTPtr prepareInterpreterSelectQuery(std::vector<Stage> &prepared_stages, bool dry_run);
|
||||
void initQueryPlan(Stage & first_stage, QueryPlan & query_plan);
|
||||
void prepareMutationStages(std::vector<Stage> &prepared_stages, bool dry_run);
|
||||
QueryPipelineBuilder addStreamsForLaterStages(const std::vector<Stage> & prepared_stages, QueryPlan & plan) const;
|
||||
|
||||
std::optional<SortDescription> getStorageSortDescriptionIfPossible(const Block & header) const;
|
||||
|
||||
ASTPtr getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const;
|
||||
|
||||
StoragePtr storage;
|
||||
Source source;
|
||||
StorageMetadataPtr metadata_snapshot;
|
||||
MutationCommands commands;
|
||||
ContextPtr context;
|
||||
@ -103,12 +157,6 @@ private:
|
||||
|
||||
bool apply_deleted_mask = true;
|
||||
|
||||
ASTPtr mutation_ast;
|
||||
|
||||
/// We have to store interpreter because it use own copy of context
|
||||
/// and some streams from execute method may use it.
|
||||
std::unique_ptr<InterpreterSelectQuery> select_interpreter;
|
||||
|
||||
/// A sequence of mutation commands is executed as a sequence of stages. Each stage consists of several
|
||||
/// filters, followed by updating values of some columns. Commands can reuse expressions calculated by the
|
||||
/// previous commands in the same stage, but at the end of each stage intermediate columns are thrown away
|
||||
|
@ -342,11 +342,14 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
|
||||
query_id.clear();
|
||||
query_context.reset();
|
||||
|
||||
/// The memory of thread_group->finished_threads_counters_memory is temporarily moved to this vector, which is deallocated out of critical section.
|
||||
std::vector<ThreadGroupStatus::ProfileEventsCountersAndMemory> move_to_temp;
|
||||
|
||||
/// Avoid leaking of ThreadGroupStatus::finished_threads_counters_memory
|
||||
/// (this is in case someone uses system thread but did not call getProfileEventsCountersAndMemoryForThreads())
|
||||
{
|
||||
std::lock_guard guard(thread_group->mutex);
|
||||
auto stats = std::move(thread_group->finished_threads_counters_memory);
|
||||
move_to_temp = std::move(thread_group->finished_threads_counters_memory);
|
||||
}
|
||||
|
||||
thread_group.reset();
|
||||
|
@ -1146,7 +1146,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
required.insert(std::min_element(columns.begin(), columns.end())->name);
|
||||
else if (!source_columns.empty())
|
||||
/// If we have no information about columns sizes, choose a column of minimum size of its data type.
|
||||
required.insert(ExpressionActions::getSmallestColumn(source_columns));
|
||||
required.insert(ExpressionActions::getSmallestColumn(source_columns).name);
|
||||
}
|
||||
else if (is_select && storage_snapshot && !columns_context.has_array_join)
|
||||
{
|
||||
|
@ -81,6 +81,63 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names,
|
||||
query_context->checkAccess(AccessType::SELECT, storage_id, column_names);
|
||||
}
|
||||
|
||||
NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot)
|
||||
{
|
||||
/** We need to read at least one column to find the number of rows.
|
||||
* We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
|
||||
* Because it is the column that is cheapest to read.
|
||||
*/
|
||||
class ColumnWithSize
|
||||
{
|
||||
public:
|
||||
ColumnWithSize(NameAndTypePair column_, ColumnSize column_size_)
|
||||
: column(std::move(column_))
|
||||
, compressed_size(column_size_.data_compressed)
|
||||
, uncompressed_size(column_size_.data_uncompressed)
|
||||
, type_size(column.type->haveMaximumSizeOfValue() ? column.type->getMaximumSizeOfValueInMemory() : 100)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator<(const ColumnWithSize & rhs) const
|
||||
{
|
||||
return std::tie(compressed_size, type_size, uncompressed_size)
|
||||
< std::tie(rhs.compressed_size, rhs.type_size, rhs.uncompressed_size);
|
||||
}
|
||||
|
||||
NameAndTypePair column;
|
||||
size_t compressed_size = 0;
|
||||
size_t uncompressed_size = 0;
|
||||
size_t type_size = 0;
|
||||
};
|
||||
|
||||
std::vector<ColumnWithSize> columns_with_sizes;
|
||||
|
||||
auto column_sizes = storage->getColumnSizes();
|
||||
auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns());
|
||||
|
||||
if (!column_sizes.empty())
|
||||
{
|
||||
for (auto & column_name_and_type : column_names_and_types)
|
||||
{
|
||||
auto it = column_sizes.find(column_name_and_type.name);
|
||||
if (it == column_sizes.end())
|
||||
continue;
|
||||
|
||||
columns_with_sizes.emplace_back(column_name_and_type, it->second);
|
||||
}
|
||||
}
|
||||
|
||||
NameAndTypePair result;
|
||||
|
||||
if (!columns_with_sizes.empty())
|
||||
result = std::min_element(columns_with_sizes.begin(), columns_with_sizes.end())->column;
|
||||
else
|
||||
/// If we have no information about columns sizes, choose a column of minimum size of its data type
|
||||
result = ExpressionActions::getSmallestColumn(column_names_and_types);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
|
||||
SelectQueryInfo & select_query_info,
|
||||
const SelectQueryOptions & select_query_options,
|
||||
@ -127,9 +184,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
|
||||
|
||||
if (columns_names.empty())
|
||||
{
|
||||
auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns());
|
||||
auto additional_column_to_read = column_names_and_types.front();
|
||||
|
||||
auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot);
|
||||
const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression);
|
||||
columns_names.push_back(additional_column_to_read.name);
|
||||
table_expression_data.addColumn(additional_column_to_read, column_identifier);
|
||||
|
@ -64,7 +64,6 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings(
|
||||
.save_marks_in_cache = true,
|
||||
.checksum_on_read = settings.checksum_on_read,
|
||||
.read_in_order = query_info.input_order_info != nullptr,
|
||||
.apply_deleted_mask = context->applyDeletedMask(),
|
||||
.use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree
|
||||
&& (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1),
|
||||
};
|
||||
@ -1023,7 +1022,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
||||
if (result.column_names_to_read.empty())
|
||||
{
|
||||
NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical();
|
||||
result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
|
||||
result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns).name);
|
||||
}
|
||||
|
||||
// storage_snapshot->check(result.column_names_to_read);
|
||||
|
32
src/Processors/Sources/ThrowingExceptionSource.h
Normal file
32
src/Processors/Sources/ThrowingExceptionSource.h
Normal file
@ -0,0 +1,32 @@
|
||||
#pragma once
|
||||
#include <Processors/ISource.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// This source is throwing exception at the first attempt to read from it.
|
||||
/// Can be used as a additional check that pipeline (or its part) is never executed.
|
||||
class ThrowingExceptionSource : public ISource
|
||||
{
|
||||
public:
|
||||
|
||||
using CallBack = std::function<Exception()>;
|
||||
|
||||
explicit ThrowingExceptionSource(Block header, CallBack callback_)
|
||||
: ISource(std::move(header))
|
||||
, callback(std::move(callback_))
|
||||
{}
|
||||
|
||||
String getName() const override { return "ThrowingExceptionSource"; }
|
||||
|
||||
protected:
|
||||
Chunk generate() override
|
||||
{
|
||||
throw callback();
|
||||
}
|
||||
|
||||
CallBack callback;
|
||||
};
|
||||
|
||||
}
|
@ -599,7 +599,7 @@ Pipe StorageHDFS::read(
|
||||
{ return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
|
||||
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
|
||||
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
|
@ -110,6 +110,8 @@ public:
|
||||
/// The name of the table.
|
||||
StorageID getStorageID() const;
|
||||
|
||||
virtual bool isMergeTree() const { return false; }
|
||||
|
||||
/// Returns true if the storage receives data from a remote server or servers.
|
||||
virtual bool isRemote() const { return false; }
|
||||
|
||||
|
@ -47,7 +47,7 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String &
|
||||
if (!part_info.contains(it->first))
|
||||
{
|
||||
if (!part_info.isDisjoint(it->first))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartName(), it->first.getPartName());
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", part_info.getPartNameForLogs(), it->first.getPartNameForLogs());
|
||||
++it;
|
||||
break;
|
||||
}
|
||||
@ -70,7 +70,7 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String &
|
||||
}
|
||||
|
||||
if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName());
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs());
|
||||
|
||||
part_info_to_name.emplace(part_info, name);
|
||||
return true;
|
||||
@ -79,7 +79,7 @@ bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, const String &
|
||||
|
||||
bool ActiveDataPartSet::add(const MergeTreePartInfo & part_info, Strings * out_replaced_parts)
|
||||
{
|
||||
return add(part_info, part_info.getPartName(), out_replaced_parts);
|
||||
return add(part_info, part_info.getPartNameAndCheckFormat(format_version), out_replaced_parts);
|
||||
}
|
||||
|
||||
|
||||
|
@ -19,7 +19,7 @@ bool DropPartsRanges::isAffectedByDropRange(const std::string & new_part_name, s
|
||||
{
|
||||
if (!drop_range.isDisjoint(entry_info))
|
||||
{
|
||||
postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartName(), new_part_name);
|
||||
postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartNameForLogs(), new_part_name);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -81,7 +81,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, Merg
|
||||
name = part_info.getPartNameV0(min_date, max_date);
|
||||
}
|
||||
else
|
||||
name = part_info.getPartName();
|
||||
name = part_info.getPartNameV1();
|
||||
}
|
||||
|
||||
void FutureMergedMutatedPart::updatePath(const MergeTreeData & storage, const IReservation * reservation)
|
||||
|
@ -375,7 +375,7 @@ String IMergeTreeDataPart::getNewName(const MergeTreePartInfo & new_part_info) c
|
||||
return new_part_info.getPartNameV0(min_date, max_date);
|
||||
}
|
||||
else
|
||||
return new_part_info.getPartName();
|
||||
return new_part_info.getPartNameV1();
|
||||
}
|
||||
|
||||
std::optional<size_t> IMergeTreeDataPart::getColumnPosition(const String & column_name) const
|
||||
|
@ -2054,7 +2054,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
|
||||
MergeTreePartInfo range_info = part->info;
|
||||
range_info.level = static_cast<UInt32>(range_info.max_block - range_info.min_block);
|
||||
range_info.mutation = 0;
|
||||
independent_ranges_set.add(range_info, range_info.getPartName());
|
||||
independent_ranges_set.add(range_info, range_info.getPartNameV1());
|
||||
}
|
||||
|
||||
auto independent_ranges_infos = independent_ranges_set.getPartInfos();
|
||||
@ -2080,7 +2080,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
|
||||
if (thread_group)
|
||||
CurrentThread::attachToIfDetached(thread_group);
|
||||
|
||||
LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartName());
|
||||
LOG_TRACE(log, "Removing {} parts in blocks range {}", batch.size(), range.getPartNameForLogs());
|
||||
|
||||
for (const auto & part : batch)
|
||||
{
|
||||
@ -3405,7 +3405,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange(
|
||||
DataPartsVector parts_to_remove;
|
||||
|
||||
if (drop_range.min_block > drop_range.max_block)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartName());
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartNameForLogs());
|
||||
|
||||
auto partition_range = getVisibleDataPartsVectorInPartition(txn, drop_range.partition_id, &lock);
|
||||
|
||||
@ -3437,7 +3437,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange(
|
||||
bool is_covered_by_min_max_block = part->info.min_block <= drop_range.min_block && part->info.max_block >= drop_range.max_block && part->info.getMutationVersion() >= drop_range.getMutationVersion();
|
||||
if (is_covered_by_min_max_block)
|
||||
{
|
||||
LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartName(), part->name);
|
||||
LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartNameForLogs(), part->name);
|
||||
return {};
|
||||
}
|
||||
}
|
||||
@ -3448,7 +3448,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange(
|
||||
{
|
||||
/// Intersect left border
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}",
|
||||
part->name, drop_range.getPartName());
|
||||
part->name, drop_range.getPartNameForLogs());
|
||||
}
|
||||
|
||||
continue;
|
||||
@ -3462,7 +3462,7 @@ DataPartsVector MergeTreeData::grabActivePartsToRemoveForDropRange(
|
||||
{
|
||||
/// Intersect right border
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}",
|
||||
part->name, drop_range.getPartName());
|
||||
part->name, drop_range.getPartNameForLogs());
|
||||
}
|
||||
|
||||
parts_to_remove.emplace_back(part);
|
||||
@ -4241,8 +4241,8 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
|
||||
{
|
||||
auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version);
|
||||
parts.push_back(getActiveContainingPart(part_info));
|
||||
if (!parts.back() || parts.back()->name != part_info.getPartName())
|
||||
throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART);
|
||||
if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version))
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id);
|
||||
}
|
||||
else
|
||||
parts = getVisibleDataPartsVectorInPartition(local_context, partition_id);
|
||||
@ -4283,18 +4283,18 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
|
||||
{
|
||||
auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version);
|
||||
parts.emplace_back(getActiveContainingPart(part_info));
|
||||
if (!parts.back() || parts.back()->name != part_info.getPartName())
|
||||
throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART);
|
||||
if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version))
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id);
|
||||
}
|
||||
else
|
||||
parts = getVisibleDataPartsVectorInPartition(local_context, partition_id);
|
||||
|
||||
auto volume = getStoragePolicy()->getVolumeByName(name);
|
||||
if (!volume)
|
||||
throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK);
|
||||
throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exists on policy {}", name, getStoragePolicy()->getName());
|
||||
|
||||
if (parts.empty())
|
||||
throw Exception("Nothing to move (check that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART);
|
||||
throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Nothing to move (check that the partition exists).");
|
||||
|
||||
std::erase_if(parts, [&](auto part_ptr)
|
||||
{
|
||||
@ -4661,7 +4661,7 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const
|
||||
|
||||
void MergeTreeData::restorePartFromBackup(std::shared_ptr<RestoredPartsHolder> restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup) const
|
||||
{
|
||||
String part_name = part_info.getPartName();
|
||||
String part_name = part_info.getPartNameAndCheckFormat(format_version);
|
||||
auto backup = restored_parts_holder->getBackup();
|
||||
|
||||
UInt64 total_size_of_part = 0;
|
||||
@ -6052,6 +6052,10 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
|
||||
if (settings.parallel_replicas_count > 1 || settings.max_parallel_replicas > 1)
|
||||
return std::nullopt;
|
||||
|
||||
/// Cannot use projections in case of additional filter.
|
||||
if (query_info.additional_filter_ast)
|
||||
return std::nullopt;
|
||||
|
||||
auto query_ptr = query_info.original_query;
|
||||
auto * select_query = query_ptr->as<ASTSelectQuery>();
|
||||
if (!select_query)
|
||||
|
@ -424,6 +424,8 @@ public:
|
||||
|
||||
StoragePolicyPtr getStoragePolicy() const override;
|
||||
|
||||
bool isMergeTree() const override { return true; }
|
||||
|
||||
bool supportsPrewhere() const override { return true; }
|
||||
|
||||
bool supportsFinal() const override;
|
||||
|
@ -66,6 +66,13 @@ public:
|
||||
size_t num_streams,
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read = nullptr) const;
|
||||
|
||||
static MarkRanges markRangesFromPKRange(
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const KeyCondition & key_condition,
|
||||
const Settings & settings,
|
||||
Poco::Logger * log);
|
||||
|
||||
private:
|
||||
const MergeTreeData & data;
|
||||
Poco::Logger * log;
|
||||
@ -78,13 +85,6 @@ private:
|
||||
const Settings & settings,
|
||||
Poco::Logger * log);
|
||||
|
||||
static MarkRanges markRangesFromPKRange(
|
||||
const MergeTreeData::DataPartPtr & part,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const KeyCondition & key_condition,
|
||||
const Settings & settings,
|
||||
Poco::Logger * log);
|
||||
|
||||
static MarkRanges filterMarksUsingIndex(
|
||||
MergeTreeIndexPtr index_helper,
|
||||
MergeTreeIndexConditionPtr condition,
|
||||
|
@ -368,7 +368,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPartImpl(
|
||||
part_name = new_part_info.getPartNameV0(min_date, max_date);
|
||||
}
|
||||
else
|
||||
part_name = new_part_info.getPartName();
|
||||
part_name = new_part_info.getPartNameV1();
|
||||
|
||||
std::string part_dir;
|
||||
if (need_tmp_prefix)
|
||||
|
@ -232,7 +232,7 @@ std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std:
|
||||
/// Create new record
|
||||
MergeTreeDeduplicationLogRecord record;
|
||||
record.operation = MergeTreeDeduplicationOp::ADD;
|
||||
record.part_name = part_info.getPartName();
|
||||
record.part_name = part_info.getPartNameAndCheckFormat(format_version);
|
||||
record.block_id = block_id;
|
||||
/// Write it to disk
|
||||
writeRecord(record, *current_writer);
|
||||
@ -269,7 +269,7 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf
|
||||
/// Create drop record
|
||||
MergeTreeDeduplicationLogRecord record;
|
||||
record.operation = MergeTreeDeduplicationOp::DROP;
|
||||
record.part_name = part_info.getPartName();
|
||||
record.part_name = part_info.getPartNameAndCheckFormat(format_version);
|
||||
record.block_id = itr->key;
|
||||
/// Write it to disk
|
||||
writeRecord(record, *current_writer);
|
||||
|
@ -167,7 +167,25 @@ bool MergeTreePartInfo::contains(const String & outer_part_name, const String &
|
||||
}
|
||||
|
||||
|
||||
String MergeTreePartInfo::getPartName() const
|
||||
String MergeTreePartInfo::getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const
|
||||
{
|
||||
if (format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
||||
return getPartNameV1();
|
||||
|
||||
/// We cannot just call getPartNameV0 because it requires extra arguments, but at least we can warn about it.
|
||||
chassert(false); /// Catch it in CI. Feel free to remove this line.
|
||||
throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Trying to get part name in new format for old format version. "
|
||||
"Either some new feature is incompatible with deprecated *MergeTree definition syntax or it's a bug.");
|
||||
}
|
||||
|
||||
|
||||
String MergeTreePartInfo::getPartNameForLogs() const
|
||||
{
|
||||
/// We don't care about format version here
|
||||
return getPartNameV1();
|
||||
}
|
||||
|
||||
String MergeTreePartInfo::getPartNameV1() const
|
||||
{
|
||||
WriteBufferFromOwnString wb;
|
||||
|
||||
|
@ -103,7 +103,9 @@ struct MergeTreePartInfo
|
||||
return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level;
|
||||
}
|
||||
|
||||
String getPartName() const;
|
||||
String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const;
|
||||
String getPartNameForLogs() const;
|
||||
String getPartNameV1() const;
|
||||
String getPartNameV0(DayNum left_date, DayNum right_date) const;
|
||||
UInt64 getBlocksCount() const
|
||||
{
|
||||
|
@ -1,9 +1,14 @@
|
||||
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
|
||||
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
|
||||
#include <Processors/Transforms/FilterTransform.h>
|
||||
#include <Processors/QueryPlan/ISourceStep.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
#include <QueryPipeline/Pipe.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/Sources/NullSource.h>
|
||||
#include <Processors/QueryPlan/FilterStep.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -25,6 +30,8 @@ public:
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MergeTreeData::DataPartPtr data_part_,
|
||||
Names columns_to_read_,
|
||||
std::optional<MarkRanges> mark_ranges_,
|
||||
bool apply_deleted_mask,
|
||||
bool read_with_direct_io_,
|
||||
bool take_column_types_from_storage,
|
||||
bool quiet = false);
|
||||
@ -56,6 +63,8 @@ private:
|
||||
|
||||
Poco::Logger * log = &Poco::Logger::get("MergeTreeSequentialSource");
|
||||
|
||||
std::optional<MarkRanges> mark_ranges;
|
||||
|
||||
std::shared_ptr<MarkCache> mark_cache;
|
||||
using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
|
||||
MergeTreeReaderPtr reader;
|
||||
@ -76,6 +85,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MergeTreeData::DataPartPtr data_part_,
|
||||
Names columns_to_read_,
|
||||
std::optional<MarkRanges> mark_ranges_,
|
||||
bool apply_deleted_mask,
|
||||
bool read_with_direct_io_,
|
||||
bool take_column_types_from_storage,
|
||||
bool quiet)
|
||||
@ -85,6 +96,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
, data_part(std::move(data_part_))
|
||||
, columns_to_read(std::move(columns_to_read_))
|
||||
, read_with_direct_io(read_with_direct_io_)
|
||||
, mark_ranges(std::move(mark_ranges_))
|
||||
, mark_cache(storage.getContext()->getMarkCache())
|
||||
{
|
||||
if (!quiet)
|
||||
@ -126,11 +138,15 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
MergeTreeReaderSettings reader_settings =
|
||||
{
|
||||
.read_settings = read_settings,
|
||||
.save_marks_in_cache = false
|
||||
.save_marks_in_cache = false,
|
||||
.apply_deleted_mask = apply_deleted_mask,
|
||||
};
|
||||
|
||||
if (!mark_ranges)
|
||||
mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())});
|
||||
|
||||
reader = data_part->getReader(columns_for_reader, storage_snapshot->metadata,
|
||||
MarkRanges{MarkRange(0, data_part->getMarksCount())},
|
||||
*mark_ranges,
|
||||
/* uncompressed_cache = */ nullptr, mark_cache.get(), reader_settings, {}, {});
|
||||
}
|
||||
|
||||
@ -224,8 +240,10 @@ Pipe createMergeTreeSequentialSource(
|
||||
if (need_to_filter_deleted_rows)
|
||||
columns.emplace_back(LightweightDeleteDescription::FILTER_COLUMN.name);
|
||||
|
||||
bool apply_deleted_mask = false;
|
||||
|
||||
auto column_part_source = std::make_shared<MergeTreeSequentialSource>(
|
||||
storage, storage_snapshot, data_part, columns, read_with_direct_io, take_column_types_from_storage, quiet);
|
||||
storage, storage_snapshot, data_part, columns, std::optional<MarkRanges>{}, apply_deleted_mask, read_with_direct_io, take_column_types_from_storage, quiet);
|
||||
|
||||
Pipe pipe(std::move(column_part_source));
|
||||
|
||||
@ -242,4 +260,92 @@ Pipe createMergeTreeSequentialSource(
|
||||
return pipe;
|
||||
}
|
||||
|
||||
/// A Query Plan step to read from a single Merge Tree part
|
||||
/// using Merge Tree Sequential Source (which reads strictly sequentially in a single thread).
|
||||
/// This step is used for mutations because the usual reading is too tricky.
|
||||
/// Previously, sequential reading was achieved by changing some settings like max_threads,
|
||||
/// however, this approach lead to data corruption after some new settings were introduced.
|
||||
class ReadFromPart final : public ISourceStep
|
||||
{
|
||||
public:
|
||||
ReadFromPart(
|
||||
const MergeTreeData & storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MergeTreeData::DataPartPtr data_part_,
|
||||
Names columns_to_read_,
|
||||
bool apply_deleted_mask_,
|
||||
ActionsDAGPtr filter_,
|
||||
ContextPtr context_,
|
||||
Poco::Logger * log_)
|
||||
: ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)})
|
||||
, storage(storage_)
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, data_part(std::move(data_part_))
|
||||
, columns_to_read(std::move(columns_to_read_))
|
||||
, apply_deleted_mask(apply_deleted_mask_)
|
||||
, filter(std::move(filter_))
|
||||
, context(std::move(context_))
|
||||
, log(log_)
|
||||
{
|
||||
}
|
||||
|
||||
String getName() const override { return fmt::format("ReadFromPart({})", data_part->name); }
|
||||
|
||||
void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override
|
||||
{
|
||||
std::optional<MarkRanges> mark_ranges;
|
||||
|
||||
const auto & metadata_snapshot = storage_snapshot->metadata;
|
||||
if (filter && metadata_snapshot->hasPrimaryKey())
|
||||
{
|
||||
const auto & primary_key = storage_snapshot->metadata->getPrimaryKey();
|
||||
const Names & primary_key_column_names = primary_key.column_names;
|
||||
KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression, NameSet{});
|
||||
LOG_DEBUG(log, "Key condition: {}", key_condition.toString());
|
||||
|
||||
if (!key_condition.alwaysFalse())
|
||||
mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
||||
data_part, metadata_snapshot, key_condition, context->getSettingsRef(), log);
|
||||
|
||||
if (mark_ranges && mark_ranges->empty())
|
||||
{
|
||||
pipeline.init(Pipe(std::make_unique<NullSource>(output_stream->header)));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
auto source = std::make_unique<MergeTreeSequentialSource>(
|
||||
storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), apply_deleted_mask, false, true);
|
||||
|
||||
pipeline.init(Pipe(std::move(source)));
|
||||
}
|
||||
|
||||
private:
|
||||
const MergeTreeData & storage;
|
||||
StorageSnapshotPtr storage_snapshot;
|
||||
MergeTreeData::DataPartPtr data_part;
|
||||
Names columns_to_read;
|
||||
bool apply_deleted_mask;
|
||||
ActionsDAGPtr filter;
|
||||
ContextPtr context;
|
||||
Poco::Logger * log;
|
||||
};
|
||||
|
||||
void createMergeTreeSequentialSource(
|
||||
QueryPlan & plan,
|
||||
const MergeTreeData & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
Names columns_to_read,
|
||||
bool apply_deleted_mask,
|
||||
ActionsDAGPtr filter,
|
||||
ContextPtr context,
|
||||
Poco::Logger * log)
|
||||
{
|
||||
auto reading = std::make_unique<ReadFromPart>(
|
||||
storage, storage_snapshot, std::move(data_part), std::move(columns_to_read), apply_deleted_mask, filter, std::move(context), log);
|
||||
|
||||
plan.addStep(std::move(reading));
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -20,4 +20,17 @@ Pipe createMergeTreeSequentialSource(
|
||||
bool quiet,
|
||||
std::shared_ptr<std::atomic<size_t>> filtered_rows_count);
|
||||
|
||||
class QueryPlan;
|
||||
|
||||
void createMergeTreeSequentialSource(
|
||||
QueryPlan & plan,
|
||||
const MergeTreeData & storage,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
MergeTreeData::DataPartPtr data_part,
|
||||
Names columns_to_read,
|
||||
bool apply_deleted_mask,
|
||||
ActionsDAGPtr filter,
|
||||
ContextPtr context,
|
||||
Poco::Logger * log);
|
||||
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ void MergeTreeSink::finishDelayedChunk()
|
||||
if (!res.second)
|
||||
{
|
||||
ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks);
|
||||
LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName());
|
||||
LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartNameForLogs());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -714,8 +714,6 @@ struct MutationContext
|
||||
|
||||
FutureMergedMutatedPartPtr future_part;
|
||||
MergeTreeData::DataPartPtr source_part;
|
||||
|
||||
StoragePtr storage_from_source_part;
|
||||
StorageMetadataPtr metadata_snapshot;
|
||||
|
||||
MutationCommandsConstPtr commands;
|
||||
@ -1478,10 +1476,9 @@ MutateTask::MutateTask(
|
||||
ctx->storage_columns = metadata_snapshot_->getColumns().getAllPhysical();
|
||||
ctx->txn = txn;
|
||||
ctx->source_part = ctx->future_part->parts[0];
|
||||
ctx->storage_from_source_part = std::make_shared<StorageFromMergeTreeDataPart>(ctx->source_part);
|
||||
ctx->need_prefix = need_prefix_;
|
||||
|
||||
auto storage_snapshot = ctx->storage_from_source_part->getStorageSnapshot(ctx->metadata_snapshot, context_);
|
||||
auto storage_snapshot = ctx->data->getStorageSnapshot(ctx->metadata_snapshot, context_);
|
||||
extendObjectColumns(ctx->storage_columns, storage_snapshot->object_columns, /*with_subcolumns=*/ false);
|
||||
}
|
||||
|
||||
@ -1554,7 +1551,7 @@ bool MutateTask::prepare()
|
||||
}
|
||||
|
||||
if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations(
|
||||
ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading)))
|
||||
*ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading)))
|
||||
{
|
||||
NameSet files_to_copy_instead_of_hardlinks;
|
||||
auto settings_ptr = ctx->data->getSettings();
|
||||
@ -1597,7 +1594,7 @@ bool MutateTask::prepare()
|
||||
if (!ctx->for_interpreter.empty())
|
||||
{
|
||||
ctx->interpreter = std::make_unique<MutationsInterpreter>(
|
||||
ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true);
|
||||
*ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true);
|
||||
ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices();
|
||||
ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections();
|
||||
ctx->mutation_kind = ctx->interpreter->getMutationKind();
|
||||
|
@ -473,7 +473,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st
|
||||
log_entry.log_entry_id = attach_log_entry_barrier_path;
|
||||
log_entry.part_checksum = part->checksums.getTotalChecksumHex();
|
||||
log_entry.create_time = std::time(nullptr);
|
||||
log_entry.new_part_name = part_info.getPartName();
|
||||
log_entry.new_part_name = part_info.getPartNameAndCheckFormat(storage.format_version);
|
||||
|
||||
ops.emplace_back(zkutil::makeCreateRequest(attach_log_entry_barrier_path, log_entry.toString(), -1));
|
||||
ops.emplace_back(zkutil::makeSetRequest(entry.to_shard + "/log", "", -1));
|
||||
|
@ -1504,7 +1504,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
|
||||
entry.znode_name,
|
||||
entry.typeToString(),
|
||||
entry.new_part_name,
|
||||
info.getPartName());
|
||||
info.getPartNameForLogs());
|
||||
LOG_TRACE(log, fmt::runtime(out_postpone_reason));
|
||||
return false;
|
||||
}
|
||||
|
@ -706,7 +706,7 @@ Pipe StorageFile::read(
|
||||
});
|
||||
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
|
||||
}
|
||||
else
|
||||
|
@ -488,7 +488,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
|
||||
|
||||
column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
|
||||
if (column_names_as_aliases.empty())
|
||||
column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()));
|
||||
column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
|
||||
}
|
||||
|
||||
auto source_pipeline = createSources(
|
||||
@ -574,7 +574,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
|
||||
{
|
||||
/// If there are only virtual columns in query, you must request at least one other column.
|
||||
if (real_column_names.empty())
|
||||
real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
|
||||
|
||||
QueryPlan plan;
|
||||
if (StorageView * view = dynamic_cast<StorageView *>(storage.get()))
|
||||
|
@ -1490,8 +1490,11 @@ String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataP
|
||||
|
||||
MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFoundValidPart(const LogEntry& entry) const
|
||||
{
|
||||
if (format_version != MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
|
||||
return {};
|
||||
|
||||
const MergeTreePartInfo actual_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
|
||||
const String part_new_name = actual_part_info.getPartName();
|
||||
const String part_new_name = actual_part_info.getPartNameV1();
|
||||
|
||||
for (const DiskPtr & disk : getStoragePolicy()->getDisks())
|
||||
{
|
||||
@ -1502,7 +1505,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo
|
||||
if (!part_info || part_info->partition_id != actual_part_info.partition_id)
|
||||
continue;
|
||||
|
||||
const String part_old_name = part_info->getPartName();
|
||||
const String part_old_name = part_info->getPartNameV1();
|
||||
|
||||
const VolumePtr volume = std::make_shared<SingleDiskVolume>("volume_" + part_old_name, disk);
|
||||
|
||||
@ -3892,7 +3895,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id)
|
||||
bool StorageReplicatedMergeTree::partIsInsertingWithParallelQuorum(const MergeTreePartInfo & part_info) const
|
||||
{
|
||||
auto zookeeper = getZooKeeper();
|
||||
return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartName());
|
||||
return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameAndCheckFormat(format_version));
|
||||
}
|
||||
|
||||
|
||||
@ -3914,7 +3917,7 @@ bool StorageReplicatedMergeTree::partIsLastQuorumPart(const MergeTreePartInfo &
|
||||
if (partition_it == parts_with_quorum.added_parts.end())
|
||||
return false;
|
||||
|
||||
return partition_it->second == part_info.getPartName();
|
||||
return partition_it->second == part_info.getPartNameAndCheckFormat(format_version);
|
||||
}
|
||||
|
||||
|
||||
@ -5230,7 +5233,7 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const
|
||||
return part_info.getPartNameV0(left_date, right_date);
|
||||
}
|
||||
|
||||
return part_info.getPartName();
|
||||
return part_info.getPartNameV1();
|
||||
}
|
||||
|
||||
bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(
|
||||
@ -7725,7 +7728,7 @@ void StorageReplicatedMergeTree::enqueuePartForCheck(const String & part_name, t
|
||||
if (queue.hasDropRange(MergeTreePartInfo::fromPartName(part_name, format_version), &covering_drop_range))
|
||||
{
|
||||
LOG_WARNING(log, "Do not enqueue part {} for check because it's covered by DROP_RANGE {} and going to be removed",
|
||||
part_name, covering_drop_range.getPartName());
|
||||
part_name, covering_drop_range.getPartNameForLogs());
|
||||
return;
|
||||
}
|
||||
part_check_thread.enqueuePart(part_name, delay_to_check_seconds);
|
||||
|
@ -1057,7 +1057,7 @@ Pipe StorageS3::read(
|
||||
{ return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
|
||||
|
||||
if (fetch_columns.empty())
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
|
||||
fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
|
||||
|
||||
columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
|
||||
block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
|
||||
|
@ -29,6 +29,10 @@ from rerun_helper import RerunHelper
|
||||
IMAGE_NAME = "clickhouse/sqlancer-test"
|
||||
|
||||
|
||||
def get_pull_command(docker_image):
|
||||
return f"docker pull {docker_image}"
|
||||
|
||||
|
||||
def get_run_command(download_url, workspace_path, image):
|
||||
return (
|
||||
f"docker run "
|
||||
@ -92,6 +96,21 @@ if __name__ == "__main__":
|
||||
if not os.path.exists(workspace_path):
|
||||
os.makedirs(workspace_path)
|
||||
|
||||
pull_command = get_pull_command(docker_image)
|
||||
|
||||
logging.info("Going to pull image %s", pull_command)
|
||||
|
||||
pull_log_path = os.path.join(workspace_path, "pull.log")
|
||||
with open(pull_log_path, "w", encoding="utf-8") as log:
|
||||
with subprocess.Popen(
|
||||
pull_command, shell=True, stderr=log, stdout=log
|
||||
) as process:
|
||||
retcode = process.wait()
|
||||
if retcode == 0:
|
||||
logging.info("Pull successfully")
|
||||
else:
|
||||
logging.info("Pull failed")
|
||||
|
||||
run_command = get_run_command(build_url, workspace_path, docker_image)
|
||||
logging.info("Going to run %s", run_command)
|
||||
|
||||
@ -124,6 +143,7 @@ if __name__ == "__main__":
|
||||
|
||||
paths = [
|
||||
run_log_path,
|
||||
pull_log_path,
|
||||
os.path.join(workspace_path, "clickhouse-server.log"),
|
||||
os.path.join(workspace_path, "stderr.log"),
|
||||
os.path.join(workspace_path, "stdout.log"),
|
||||
|
@ -1544,8 +1544,11 @@ def check_server_started(args):
|
||||
print(" OK")
|
||||
sys.stdout.flush()
|
||||
return True
|
||||
except (ConnectionError, http.client.ImproperConnectionState):
|
||||
print(".", end="")
|
||||
except (ConnectionError, http.client.ImproperConnectionState) as e:
|
||||
if args.hung_check:
|
||||
print("Connection error, will retry: ", str(e))
|
||||
else:
|
||||
print(".", end="")
|
||||
sys.stdout.flush()
|
||||
retry_count -= 1
|
||||
sleep(0.5)
|
||||
|
@ -1,3 +1,10 @@
|
||||
import pytest
|
||||
|
||||
# FIXME This test is too flaky
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/45160
|
||||
|
||||
pytestmark = pytest.mark.skip
|
||||
|
||||
import json
|
||||
import os.path as p
|
||||
import random
|
||||
@ -9,7 +16,6 @@ from random import randrange
|
||||
import math
|
||||
|
||||
import pika
|
||||
import pytest
|
||||
from google.protobuf.internal.encoder import _VarintBytes
|
||||
from helpers.client import QueryRuntimeException
|
||||
from helpers.cluster import ClickHouseCluster, check_rabbitmq_is_available
|
||||
|
@ -120,11 +120,11 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn');
|
||||
/* toDayOfWeek */
|
||||
|
||||
SELECT 'toDayOfWeek';
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Istanbul');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/Paris');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/London');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Tokyo');
|
||||
SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Pacific/Pitcairn');
|
||||
|
||||
/* toHour */
|
||||
|
||||
|
@ -7,14 +7,14 @@ import sys
|
||||
import argparse
|
||||
|
||||
# Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
|
||||
FUNCTIONS="""
|
||||
FUNCTIONS = """
|
||||
toTimeZone(N, 'UTC')
|
||||
toYear(N, 'Asia/Istanbul')
|
||||
toQuarter(N, 'Asia/Istanbul')
|
||||
toMonth(N, 'Asia/Istanbul')
|
||||
toDayOfYear(N, 'Asia/Istanbul')
|
||||
toDayOfMonth(N, 'Asia/Istanbul')
|
||||
toDayOfWeek(N, 'Asia/Istanbul')
|
||||
toDayOfWeek(N, 0, 'Asia/Istanbul')
|
||||
toHour(N, 'Asia/Istanbul')
|
||||
toMinute(N, 'Asia/Istanbul')
|
||||
toSecond(N, 'Asia/Istanbul')
|
||||
@ -90,68 +90,51 @@ formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%',
|
||||
extra_ops = [
|
||||
# With same type:
|
||||
(
|
||||
['N {op} N'],
|
||||
["N {op} N"],
|
||||
{
|
||||
'op':
|
||||
[
|
||||
'- ', # does not work, but should it?
|
||||
'+ ', # does not work, but should it?
|
||||
'!=', '==', # equality and inequality supposed to take sub-second part in account
|
||||
'< ',
|
||||
'<=',
|
||||
'> ',
|
||||
'>='
|
||||
"op": [
|
||||
"- ", # does not work, but should it?
|
||||
"+ ", # does not work, but should it?
|
||||
"!=",
|
||||
"==", # equality and inequality supposed to take sub-second part in account
|
||||
"< ",
|
||||
"<=",
|
||||
"> ",
|
||||
">=",
|
||||
]
|
||||
}
|
||||
},
|
||||
),
|
||||
# With other DateTime types:
|
||||
(
|
||||
[
|
||||
'N {op} {arg}',
|
||||
'{arg} {op} N'
|
||||
],
|
||||
["N {op} {arg}", "{arg} {op} N"],
|
||||
{
|
||||
'op':
|
||||
[
|
||||
'-', # does not work, but should it?
|
||||
'!=', '==',
|
||||
"op": [
|
||||
"-", # does not work, but should it?
|
||||
"!=",
|
||||
"==",
|
||||
# these are naturally expected to work, but they don't:
|
||||
'< ',
|
||||
'<=',
|
||||
'> ',
|
||||
'>='
|
||||
"< ",
|
||||
"<=",
|
||||
"> ",
|
||||
">=",
|
||||
],
|
||||
'arg': ['DT', 'D', 'DT64'],
|
||||
}
|
||||
"arg": ["DT", "D", "DT64"],
|
||||
},
|
||||
),
|
||||
# With arithmetic types
|
||||
(
|
||||
[
|
||||
'N {op} {arg}',
|
||||
'{arg} {op} N'
|
||||
],
|
||||
["N {op} {arg}", "{arg} {op} N"],
|
||||
{
|
||||
'op':
|
||||
[
|
||||
'+ ',
|
||||
'- ',
|
||||
'==',
|
||||
'!=',
|
||||
'< ',
|
||||
'<=',
|
||||
'> ',
|
||||
'>='
|
||||
],
|
||||
'arg':
|
||||
[
|
||||
'toUInt8(1)',
|
||||
'toInt8(-1)',
|
||||
'toUInt16(1)',
|
||||
'toInt16(-1)',
|
||||
'toUInt32(1)',
|
||||
'toInt32(-1)',
|
||||
'toUInt64(1)',
|
||||
'toInt64(-1)'
|
||||
"op": ["+ ", "- ", "==", "!=", "< ", "<=", "> ", ">="],
|
||||
"arg": [
|
||||
"toUInt8(1)",
|
||||
"toInt8(-1)",
|
||||
"toUInt16(1)",
|
||||
"toInt16(-1)",
|
||||
"toUInt32(1)",
|
||||
"toInt32(-1)",
|
||||
"toUInt64(1)",
|
||||
"toInt64(-1)",
|
||||
],
|
||||
},
|
||||
),
|
||||
@ -167,14 +150,17 @@ for funcs, args in extra_ops:
|
||||
|
||||
# filter out empty lines and commented out lines
|
||||
COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
|
||||
FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None])
|
||||
TYPES = ['D', 'DT', 'DT64']
|
||||
FUNCTIONS = list(
|
||||
[f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None]
|
||||
)
|
||||
TYPES = ["D", "DT", "DT64"]
|
||||
|
||||
|
||||
def escape_string(s):
|
||||
if sys.version_info[0] > 2:
|
||||
return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'")
|
||||
return s.encode("unicode_escape").decode("utf-8").replace("'", "\\'")
|
||||
else:
|
||||
return s.encode('string-escape').decode('utf-8')
|
||||
return s.encode("string-escape").decode("utf-8")
|
||||
|
||||
|
||||
def execute_functions_for_types(functions, types):
|
||||
@ -186,18 +172,39 @@ def execute_functions_for_types(functions, types):
|
||||
WITH \
|
||||
toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
|
||||
toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
|
||||
toDate('2019-09-16') as D, {X} as N".format(X=dt)
|
||||
print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func)))
|
||||
toDate('2019-09-16') as D, {X} as N".format(
|
||||
X=dt
|
||||
)
|
||||
print(
|
||||
(
|
||||
"""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(
|
||||
prologue=prologue, func=func
|
||||
)
|
||||
)
|
||||
)
|
||||
print("""SELECT '------------------------------------------';""")
|
||||
|
||||
|
||||
def main():
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None)
|
||||
parser.add_argument('--types_re',
|
||||
type=lambda s: re.compile('^(' + s + ')$'),
|
||||
help="RE to enable types, supported types: " + ",".join(TYPES), default=None)
|
||||
parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit")
|
||||
parser.add_argument(
|
||||
"--functions_re",
|
||||
type=re.compile,
|
||||
help="RE to enable functions",
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--types_re",
|
||||
type=lambda s: re.compile("^(" + s + ")$"),
|
||||
help="RE to enable types, supported types: " + ",".join(TYPES),
|
||||
default=None,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--list_functions",
|
||||
action="store_true",
|
||||
help="List all functions to be tested and exit",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
args = parse_args()
|
||||
@ -223,5 +230,6 @@ def main():
|
||||
|
||||
execute_functions_for_types(functions, types)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
|
@ -28,7 +28,7 @@ SELECT toDayOfMonth(N, \'Asia/Istanbul\')
|
||||
"UInt8",16
|
||||
"UInt8",16
|
||||
------------------------------------------
|
||||
SELECT toDayOfWeek(N, \'Asia/Istanbul\')
|
||||
SELECT toDayOfWeek(N, 0, \'Asia/Istanbul\')
|
||||
"UInt8",1
|
||||
"UInt8",1
|
||||
"UInt8",1
|
||||
|
@ -1 +1,2 @@
|
||||
0 0 0
|
||||
3
|
||||
|
@ -7,3 +7,9 @@ INSERT INTO t SELECT number % 10, number FROM numbers(10000);
|
||||
SELECT count(), min(a), max(a) FROM t SETTINGS additional_table_filters = {'t' : '0'};
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
drop table if exists atf_p;
|
||||
create table atf_p (x UInt64) engine = MergeTree order by tuple();
|
||||
insert into atf_p select number from numbers(10);
|
||||
select count() from atf_p settings additional_table_filters = {'atf_p': 'x <= 2'};
|
||||
drop table atf_p;
|
||||
|
@ -31,7 +31,7 @@ INSERT INTO test_table VALUES (0, 'Value');
|
||||
SELECT 'Table access without table name qualification';
|
||||
|
||||
SELECT test_id FROM test_table; -- { serverError 47 }
|
||||
SELECT test_id FROM test_unknown_table; -- { serverError 60 }
|
||||
SELECT test_id FROM test_unknown_table; -- { serverError 47 }
|
||||
|
||||
DESCRIBE (SELECT id FROM test_table);
|
||||
SELECT id FROM test_table;
|
||||
|
@ -0,0 +1,81 @@
|
||||
-- { echoOn }
|
||||
EXPLAIN actions=1
|
||||
(
|
||||
SELECT round(avg(log(2) * number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
)
|
||||
SETTINGS allow_experimental_analyzer=1;
|
||||
Expression ((Project names + Projection))
|
||||
Actions: INPUT : 0 -> avg(number_0) Float64 : 0
|
||||
COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1
|
||||
COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2
|
||||
FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3
|
||||
FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0
|
||||
ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2
|
||||
Positions: 2
|
||||
Aggregating
|
||||
Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8)
|
||||
Aggregates:
|
||||
avg(number_0)
|
||||
Function: avg(UInt64) → Float64
|
||||
Arguments: number_0
|
||||
Expression ((Before GROUP BY + Change column names to column identifiers))
|
||||
Actions: INPUT : 0 -> number UInt64 : 0
|
||||
COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1
|
||||
COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2
|
||||
ALIAS number :: 0 -> number_0 UInt64 : 3
|
||||
FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0
|
||||
FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1
|
||||
Positions: 0 1 3
|
||||
ReadFromStorage (SystemNumbers)
|
||||
EXPLAIN actions=1
|
||||
(
|
||||
SELECT round(log(2) * avg(number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
)
|
||||
SETTINGS allow_experimental_analyzer=1;
|
||||
Expression ((Project names + Projection))
|
||||
Actions: INPUT : 0 -> avg(number_0) Float64 : 0
|
||||
COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1
|
||||
COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2
|
||||
FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3
|
||||
FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0
|
||||
ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2
|
||||
Positions: 2
|
||||
Aggregating
|
||||
Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8)
|
||||
Aggregates:
|
||||
avg(number_0)
|
||||
Function: avg(UInt64) → Float64
|
||||
Arguments: number_0
|
||||
Expression ((Before GROUP BY + Change column names to column identifiers))
|
||||
Actions: INPUT : 0 -> number UInt64 : 0
|
||||
COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1
|
||||
COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2
|
||||
ALIAS number :: 0 -> number_0 UInt64 : 3
|
||||
FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0
|
||||
FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1
|
||||
Positions: 0 1 3
|
||||
ReadFromStorage (SystemNumbers)
|
||||
SELECT round(avg(log(2) * number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
SETTINGS allow_experimental_analyzer=1;
|
||||
3465734.516505
|
||||
3465735.209653
|
||||
3465735.9028
|
||||
3465736.595947
|
||||
3465735.209653
|
||||
3465735.9028
|
||||
SELECT round(log(2) * avg(number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
SETTINGS allow_experimental_analyzer=0;
|
||||
3465734.516505
|
||||
3465735.209653
|
||||
3465735.9028
|
||||
3465736.595947
|
||||
3465735.209653
|
||||
3465735.9028
|
@ -0,0 +1,26 @@
|
||||
-- { echoOn }
|
||||
EXPLAIN actions=1
|
||||
(
|
||||
SELECT round(avg(log(2) * number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
)
|
||||
SETTINGS allow_experimental_analyzer=1;
|
||||
|
||||
EXPLAIN actions=1
|
||||
(
|
||||
SELECT round(log(2) * avg(number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
)
|
||||
SETTINGS allow_experimental_analyzer=1;
|
||||
|
||||
SELECT round(avg(log(2) * number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
SETTINGS allow_experimental_analyzer=1;
|
||||
|
||||
SELECT round(log(2) * avg(number), 6) AS k
|
||||
FROM numbers(10000000)
|
||||
GROUP BY number % 3, number % 2
|
||||
SETTINGS allow_experimental_analyzer=0;
|
@ -16,5 +16,5 @@ c1 Nullable(Float64)
|
||||
c1 Nullable(Float64)
|
||||
c1 Array(Nullable(Float64))
|
||||
c1 Array(Nullable(Float64))
|
||||
c1 Array(Nullable(Float64))
|
||||
c1 Array(Nullable(Float64))
|
||||
c1 Nullable(String)
|
||||
c1 Nullable(String)
|
||||
|
@ -0,0 +1,4 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT toUInt64(NULL) AS x FROM (SELECT 1) HAVING x IN
|
||||
(SELECT NULL FROM (SELECT x IN (SELECT x IN (SELECT 1), x IN (SELECT 1) FROM (SELECT 1 WHERE x IN (SELECT NULL FROM (SELECT NULL))))));
|
@ -0,0 +1,12 @@
|
||||
c1 Nullable(UInt64)
|
||||
c1 Array(Nullable(UInt64))
|
||||
c1 Nullable(UInt64)
|
||||
c1 Nullable(UInt64)
|
||||
c1 Array(Nullable(UInt64))
|
||||
c1 Nullable(UInt64)
|
||||
number Nullable(UInt64)
|
||||
number Array(Nullable(UInt64))
|
||||
number Array(Nullable(UInt64))
|
||||
number Nullable(UInt64)
|
||||
number Nullable(UInt64)
|
||||
number Nullable(UInt64)
|
18
tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh
Executable file
18
tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh
Executable file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test";
|
||||
echo -ne '"[18446744073709551615, 10, 11]"' | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test";
|
||||
echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test";
|
||||
echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test";
|
||||
echo -ne "[18446744073709551615, 10, 11]" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test";
|
||||
echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test";
|
||||
echo -ne '{"number" : 18446744073709551615}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : [18446744073709551615, 10, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : [18446744073709551615, true, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : 18446744073709551615}, {"number" : 10}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : 18446744073709551615}, {"number" : false}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
||||
echo -ne '{"number" : "18446744073709551615"}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
|
@ -0,0 +1 @@
|
||||
1
|
@ -0,0 +1,15 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
DROP TABLE IF EXISTS test_table;
|
||||
CREATE TABLE test_table
|
||||
(
|
||||
c0 String ALIAS c1,
|
||||
c1 String,
|
||||
c2 String,
|
||||
) ENGINE = MergeTree ORDER BY c1;
|
||||
|
||||
INSERT INTO test_table VALUES ('a', 'b');
|
||||
|
||||
SELECT MAX(1) FROM test_table;
|
||||
|
||||
DROP TABLE test_table;
|
@ -0,0 +1,7 @@
|
||||
1 7
|
||||
1 7
|
||||
0 6
|
||||
1 0
|
||||
2 1
|
||||
1 7
|
||||
0 6
|
10
tests/queries/0_stateless/02521_to_custom_day_of_week.sql
Normal file
10
tests/queries/0_stateless/02521_to_custom_day_of_week.sql
Normal file
@ -0,0 +1,10 @@
|
||||
|
||||
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon), toDayOfWeek(date_sun);
|
||||
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 0), toDayOfWeek(date_sun, 0);
|
||||
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 1), toDayOfWeek(date_sun, 1);
|
||||
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 2), toDayOfWeek(date_sun, 2);
|
||||
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 3), toDayOfWeek(date_sun, 3);
|
||||
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 4), toDayOfWeek(date_sun, 4);
|
||||
with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 5), toDayOfWeek(date_sun, 5);
|
||||
|
||||
select toDayOfWeek(today(), -1); -- { serverError 43 }
|
Loading…
Reference in New Issue
Block a user